GNU Linux-libre 4.4.288-gnu1
[releases.git] / drivers / staging / rdma / hfi1 / chip.c
1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50
51 /*
52  * This file contains all of the code that is specific to the HFI chip
53  */
54
55 #include <linux/pci.h>
56 #include <linux/delay.h>
57 #include <linux/interrupt.h>
58 #include <linux/module.h>
59
60 #include "hfi.h"
61 #include "trace.h"
62 #include "mad.h"
63 #include "pio.h"
64 #include "sdma.h"
65 #include "eprom.h"
66
67 #define NUM_IB_PORTS 1
68
69 uint kdeth_qp;
70 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
72
73 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74 module_param(num_vls, uint, S_IRUGO);
75 MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
76
77 /*
78  * Default time to aggregate two 10K packets from the idle state
79  * (timer not running). The timer starts at the end of the first packet,
80  * so only the time for one 10K packet and header plus a bit extra is needed.
81  * 10 * 1024 + 64 header byte = 10304 byte
82  * 10304 byte / 12.5 GB/s = 824.32ns
83  */
84 uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85 module_param(rcv_intr_timeout, uint, S_IRUGO);
86 MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
87
88 uint rcv_intr_count = 16; /* same as qib */
89 module_param(rcv_intr_count, uint, S_IRUGO);
90 MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
91
92 ushort link_crc_mask = SUPPORTED_CRCS;
93 module_param(link_crc_mask, ushort, S_IRUGO);
94 MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
95
96 uint loopback;
97 module_param_named(loopback, loopback, uint, S_IRUGO);
98 MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
99
100 /* Other driver tunables */
101 uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102 static ushort crc_14b_sideband = 1;
103 static uint use_flr = 1;
104 uint quick_linkup; /* skip LNI */
105
106 struct flag_table {
107         u64 flag;       /* the flag */
108         char *str;      /* description string */
109         u16 extra;      /* extra information */
110         u16 unused0;
111         u32 unused1;
112 };
113
114 /* str must be a string constant */
115 #define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116 #define FLAG_ENTRY0(str, flag) {flag, str, 0}
117
118 /* Send Error Consequences */
119 #define SEC_WRITE_DROPPED       0x1
120 #define SEC_PACKET_DROPPED      0x2
121 #define SEC_SC_HALTED           0x4     /* per-context only */
122 #define SEC_SPC_FREEZE          0x8     /* per-HFI only */
123
124 #define VL15CTXT                  1
125 #define MIN_KERNEL_KCTXTS         2
126 #define NUM_MAP_REGS             32
127
128 /* Bit offset into the GUID which carries HFI id information */
129 #define GUID_HFI_INDEX_SHIFT     39
130
131 /* extract the emulation revision */
132 #define emulator_rev(dd) ((dd)->irev >> 8)
133 /* parallel and serial emulation versions are 3 and 4 respectively */
134 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
136
137 /* RSM fields */
138
139 /* packet type */
140 #define IB_PACKET_TYPE         2ull
141 #define QW_SHIFT               6ull
142 /* QPN[7..1] */
143 #define QPN_WIDTH              7ull
144
145 /* LRH.BTH: QW 0, OFFSET 48 - for match */
146 #define LRH_BTH_QW             0ull
147 #define LRH_BTH_BIT_OFFSET     48ull
148 #define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
149 #define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150 #define LRH_BTH_SELECT
151 #define LRH_BTH_MASK           3ull
152 #define LRH_BTH_VALUE          2ull
153
154 /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155 #define LRH_SC_QW              0ull
156 #define LRH_SC_BIT_OFFSET      56ull
157 #define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
158 #define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159 #define LRH_SC_MASK            128ull
160 #define LRH_SC_VALUE           0ull
161
162 /* SC[n..0] QW 0, OFFSET 60 - for select */
163 #define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
164
165 /* QPN[m+n:1] QW 1, OFFSET 1 */
166 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
167
168 /* defines to build power on SC2VL table */
169 #define SC2VL_VAL( \
170         num, \
171         sc0, sc0val, \
172         sc1, sc1val, \
173         sc2, sc2val, \
174         sc3, sc3val, \
175         sc4, sc4val, \
176         sc5, sc5val, \
177         sc6, sc6val, \
178         sc7, sc7val) \
179 ( \
180         ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181         ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182         ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183         ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184         ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185         ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186         ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187         ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
188 )
189
190 #define DC_SC_VL_VAL( \
191         range, \
192         e0, e0val, \
193         e1, e1val, \
194         e2, e2val, \
195         e3, e3val, \
196         e4, e4val, \
197         e5, e5val, \
198         e6, e6val, \
199         e7, e7val, \
200         e8, e8val, \
201         e9, e9val, \
202         e10, e10val, \
203         e11, e11val, \
204         e12, e12val, \
205         e13, e13val, \
206         e14, e14val, \
207         e15, e15val) \
208 ( \
209         ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210         ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211         ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212         ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213         ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214         ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215         ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216         ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217         ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218         ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219         ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220         ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221         ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222         ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223         ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224         ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
225 )
226
227 /* all CceStatus sub-block freeze bits */
228 #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229                         | CCE_STATUS_RXE_FROZE_SMASK \
230                         | CCE_STATUS_TXE_FROZE_SMASK \
231                         | CCE_STATUS_TXE_PIO_FROZE_SMASK)
232 /* all CceStatus sub-block TXE pause bits */
233 #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234                         | CCE_STATUS_TXE_PAUSED_SMASK \
235                         | CCE_STATUS_SDMA_PAUSED_SMASK)
236 /* all CceStatus sub-block RXE pause bits */
237 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
238
239 /*
240  * CCE Error flags.
241  */
242 static struct flag_table cce_err_status_flags[] = {
243 /* 0*/  FLAG_ENTRY0("CceCsrParityErr",
244                 CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245 /* 1*/  FLAG_ENTRY0("CceCsrReadBadAddrErr",
246                 CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247 /* 2*/  FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248                 CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249 /* 3*/  FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250                 CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251 /* 4*/  FLAG_ENTRY0("CceTrgtAccessErr",
252                 CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253 /* 5*/  FLAG_ENTRY0("CceRspdDataParityErr",
254                 CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255 /* 6*/  FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256                 CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257 /* 7*/  FLAG_ENTRY0("CceCsrCfgBusParityErr",
258                 CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259 /* 8*/  FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260                 CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261 /* 9*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263 /*10*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265 /*11*/  FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267 /*12*/  FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268                 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269 /*13*/  FLAG_ENTRY0("PcicRetryMemCorErr",
270                 CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271 /*14*/  FLAG_ENTRY0("PcicRetryMemCorErr",
272                 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273 /*15*/  FLAG_ENTRY0("PcicPostHdQCorErr",
274                 CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275 /*16*/  FLAG_ENTRY0("PcicPostHdQCorErr",
276                 CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277 /*17*/  FLAG_ENTRY0("PcicPostHdQCorErr",
278                 CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279 /*18*/  FLAG_ENTRY0("PcicCplDatQCorErr",
280                 CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281 /*19*/  FLAG_ENTRY0("PcicNPostHQParityErr",
282                 CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283 /*20*/  FLAG_ENTRY0("PcicNPostDatQParityErr",
284                 CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285 /*21*/  FLAG_ENTRY0("PcicRetryMemUncErr",
286                 CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287 /*22*/  FLAG_ENTRY0("PcicRetrySotMemUncErr",
288                 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289 /*23*/  FLAG_ENTRY0("PcicPostHdQUncErr",
290                 CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291 /*24*/  FLAG_ENTRY0("PcicPostDatQUncErr",
292                 CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293 /*25*/  FLAG_ENTRY0("PcicCplHdQUncErr",
294                 CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295 /*26*/  FLAG_ENTRY0("PcicCplDatQUncErr",
296                 CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297 /*27*/  FLAG_ENTRY0("PcicTransmitFrontParityErr",
298                 CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299 /*28*/  FLAG_ENTRY0("PcicTransmitBackParityErr",
300                 CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301 /*29*/  FLAG_ENTRY0("PcicReceiveParityErr",
302                 CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303 /*30*/  FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304                 CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305 /*31*/  FLAG_ENTRY0("LATriggered",
306                 CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307 /*32*/  FLAG_ENTRY0("CceSegReadBadAddrErr",
308                 CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309 /*33*/  FLAG_ENTRY0("CceSegWriteBadAddrErr",
310                 CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311 /*34*/  FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312                 CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313 /*35*/  FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314                 CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315 /*36*/  FLAG_ENTRY0("CceMsixTableCorErr",
316                 CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317 /*37*/  FLAG_ENTRY0("CceMsixTableUncErr",
318                 CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319 /*38*/  FLAG_ENTRY0("CceIntMapCorErr",
320                 CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321 /*39*/  FLAG_ENTRY0("CceIntMapUncErr",
322                 CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323 /*40*/  FLAG_ENTRY0("CceMsixCsrParityErr",
324                 CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
325 /*41-63 reserved*/
326 };
327
328 /*
329  * Misc Error flags
330  */
331 #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332 static struct flag_table misc_err_status_flags[] = {
333 /* 0*/  FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334 /* 1*/  FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335 /* 2*/  FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336 /* 3*/  FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337 /* 4*/  FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338 /* 5*/  FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339 /* 6*/  FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340 /* 7*/  FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341 /* 8*/  FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342 /* 9*/  FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343 /*10*/  FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344 /*11*/  FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345 /*12*/  FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
346 };
347
348 /*
349  * TXE PIO Error flags and consequences
350  */
351 static struct flag_table pio_err_status_flags[] = {
352 /* 0*/  FLAG_ENTRY("PioWriteBadCtxt",
353         SEC_WRITE_DROPPED,
354         SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355 /* 1*/  FLAG_ENTRY("PioWriteAddrParity",
356         SEC_SPC_FREEZE,
357         SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358 /* 2*/  FLAG_ENTRY("PioCsrParity",
359         SEC_SPC_FREEZE,
360         SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361 /* 3*/  FLAG_ENTRY("PioSbMemFifo0",
362         SEC_SPC_FREEZE,
363         SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364 /* 4*/  FLAG_ENTRY("PioSbMemFifo1",
365         SEC_SPC_FREEZE,
366         SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367 /* 5*/  FLAG_ENTRY("PioPccFifoParity",
368         SEC_SPC_FREEZE,
369         SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370 /* 6*/  FLAG_ENTRY("PioPecFifoParity",
371         SEC_SPC_FREEZE,
372         SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373 /* 7*/  FLAG_ENTRY("PioSbrdctlCrrelParity",
374         SEC_SPC_FREEZE,
375         SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376 /* 8*/  FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
377         SEC_SPC_FREEZE,
378         SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379 /* 9*/  FLAG_ENTRY("PioPktEvictFifoParityErr",
380         SEC_SPC_FREEZE,
381         SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382 /*10*/  FLAG_ENTRY("PioSmPktResetParity",
383         SEC_SPC_FREEZE,
384         SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385 /*11*/  FLAG_ENTRY("PioVlLenMemBank0Unc",
386         SEC_SPC_FREEZE,
387         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388 /*12*/  FLAG_ENTRY("PioVlLenMemBank1Unc",
389         SEC_SPC_FREEZE,
390         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391 /*13*/  FLAG_ENTRY("PioVlLenMemBank0Cor",
392         0,
393         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394 /*14*/  FLAG_ENTRY("PioVlLenMemBank1Cor",
395         0,
396         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397 /*15*/  FLAG_ENTRY("PioCreditRetFifoParity",
398         SEC_SPC_FREEZE,
399         SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400 /*16*/  FLAG_ENTRY("PioPpmcPblFifo",
401         SEC_SPC_FREEZE,
402         SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403 /*17*/  FLAG_ENTRY("PioInitSmIn",
404         0,
405         SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406 /*18*/  FLAG_ENTRY("PioPktEvictSmOrArbSm",
407         SEC_SPC_FREEZE,
408         SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409 /*19*/  FLAG_ENTRY("PioHostAddrMemUnc",
410         SEC_SPC_FREEZE,
411         SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412 /*20*/  FLAG_ENTRY("PioHostAddrMemCor",
413         0,
414         SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415 /*21*/  FLAG_ENTRY("PioWriteDataParity",
416         SEC_SPC_FREEZE,
417         SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418 /*22*/  FLAG_ENTRY("PioStateMachine",
419         SEC_SPC_FREEZE,
420         SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421 /*23*/  FLAG_ENTRY("PioWriteQwValidParity",
422         SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423         SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424 /*24*/  FLAG_ENTRY("PioBlockQwCountParity",
425         SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426         SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427 /*25*/  FLAG_ENTRY("PioVlfVlLenParity",
428         SEC_SPC_FREEZE,
429         SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430 /*26*/  FLAG_ENTRY("PioVlfSopParity",
431         SEC_SPC_FREEZE,
432         SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433 /*27*/  FLAG_ENTRY("PioVlFifoParity",
434         SEC_SPC_FREEZE,
435         SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436 /*28*/  FLAG_ENTRY("PioPpmcBqcMemParity",
437         SEC_SPC_FREEZE,
438         SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439 /*29*/  FLAG_ENTRY("PioPpmcSopLen",
440         SEC_SPC_FREEZE,
441         SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
442 /*30-31 reserved*/
443 /*32*/  FLAG_ENTRY("PioCurrentFreeCntParity",
444         SEC_SPC_FREEZE,
445         SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446 /*33*/  FLAG_ENTRY("PioLastReturnedCntParity",
447         SEC_SPC_FREEZE,
448         SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449 /*34*/  FLAG_ENTRY("PioPccSopHeadParity",
450         SEC_SPC_FREEZE,
451         SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452 /*35*/  FLAG_ENTRY("PioPecSopHeadParityErr",
453         SEC_SPC_FREEZE,
454         SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
455 /*36-63 reserved*/
456 };
457
458 /* TXE PIO errors that cause an SPC freeze */
459 #define ALL_PIO_FREEZE_ERR \
460         (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461         | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462         | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463         | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464         | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465         | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466         | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467         | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468         | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469         | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470         | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471         | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472         | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473         | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474         | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475         | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476         | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477         | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478         | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479         | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480         | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481         | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482         | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483         | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484         | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485         | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486         | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487         | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488         | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
489
490 /*
491  * TXE SDMA Error flags
492  */
493 static struct flag_table sdma_err_status_flags[] = {
494 /* 0*/  FLAG_ENTRY0("SDmaRpyTagErr",
495                 SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496 /* 1*/  FLAG_ENTRY0("SDmaCsrParityErr",
497                 SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498 /* 2*/  FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499                 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500 /* 3*/  FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501                 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
502 /*04-63 reserved*/
503 };
504
505 /* TXE SDMA errors that cause an SPC freeze */
506 #define ALL_SDMA_FREEZE_ERR  \
507                 (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508                 | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509                 | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
510
511 /*
512  * TXE Egress Error flags
513  */
514 #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515 static struct flag_table egress_err_status_flags[] = {
516 /* 0*/  FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517 /* 1*/  FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
518 /* 2 reserved */
519 /* 3*/  FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520                 SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521 /* 4*/  FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522 /* 5*/  FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
523 /* 6 reserved */
524 /* 7*/  FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525                 SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526 /* 8*/  FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527                 SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
528 /* 9-10 reserved */
529 /*11*/  FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530                 SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531 /*12*/  FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532 /*13*/  FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533 /*14*/  FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534 /*15*/  FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535 /*16*/  FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536                 SEES(TX_SDMA0_DISALLOWED_PACKET)),
537 /*17*/  FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538                 SEES(TX_SDMA1_DISALLOWED_PACKET)),
539 /*18*/  FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540                 SEES(TX_SDMA2_DISALLOWED_PACKET)),
541 /*19*/  FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542                 SEES(TX_SDMA3_DISALLOWED_PACKET)),
543 /*20*/  FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544                 SEES(TX_SDMA4_DISALLOWED_PACKET)),
545 /*21*/  FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546                 SEES(TX_SDMA5_DISALLOWED_PACKET)),
547 /*22*/  FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548                 SEES(TX_SDMA6_DISALLOWED_PACKET)),
549 /*23*/  FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550                 SEES(TX_SDMA7_DISALLOWED_PACKET)),
551 /*24*/  FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552                 SEES(TX_SDMA8_DISALLOWED_PACKET)),
553 /*25*/  FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554                 SEES(TX_SDMA9_DISALLOWED_PACKET)),
555 /*26*/  FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556                 SEES(TX_SDMA10_DISALLOWED_PACKET)),
557 /*27*/  FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558                 SEES(TX_SDMA11_DISALLOWED_PACKET)),
559 /*28*/  FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560                 SEES(TX_SDMA12_DISALLOWED_PACKET)),
561 /*29*/  FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562                 SEES(TX_SDMA13_DISALLOWED_PACKET)),
563 /*30*/  FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564                 SEES(TX_SDMA14_DISALLOWED_PACKET)),
565 /*31*/  FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566                 SEES(TX_SDMA15_DISALLOWED_PACKET)),
567 /*32*/  FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568                 SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569 /*33*/  FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570                 SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571 /*34*/  FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572                 SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573 /*35*/  FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574                 SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575 /*36*/  FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576                 SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577 /*37*/  FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578                 SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579 /*38*/  FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580                 SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581 /*39*/  FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582                 SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583 /*40*/  FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584                 SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585 /*41*/  FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586 /*42*/  FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587 /*43*/  FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588 /*44*/  FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589 /*45*/  FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590 /*46*/  FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591 /*47*/  FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592 /*48*/  FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593 /*49*/  FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594 /*50*/  FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595 /*51*/  FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596 /*52*/  FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597 /*53*/  FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598 /*54*/  FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599 /*55*/  FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600 /*56*/  FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601 /*57*/  FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602 /*58*/  FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603 /*59*/  FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604 /*60*/  FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605 /*61*/  FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606 /*62*/  FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607                 SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608 /*63*/  FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609                 SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
610 };
611
612 /*
613  * TXE Egress Error Info flags
614  */
615 #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616 static struct flag_table egress_err_info_flags[] = {
617 /* 0*/  FLAG_ENTRY0("Reserved", 0ull),
618 /* 1*/  FLAG_ENTRY0("VLErr", SEEI(VL)),
619 /* 2*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620 /* 3*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621 /* 4*/  FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622 /* 5*/  FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623 /* 6*/  FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624 /* 7*/  FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625 /* 8*/  FLAG_ENTRY0("RawErr", SEEI(RAW)),
626 /* 9*/  FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627 /*10*/  FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628 /*11*/  FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629 /*12*/  FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630 /*13*/  FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631 /*14*/  FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632 /*15*/  FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633 /*16*/  FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634 /*17*/  FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635 /*18*/  FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636 /*19*/  FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637 /*20*/  FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638 /*21*/  FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
639 };
640
641 /* TXE Egress errors that cause an SPC freeze */
642 #define ALL_TXE_EGRESS_FREEZE_ERR \
643         (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644         | SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645         | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646         | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647         | SEES(TX_LAUNCH_CSR_PARITY) \
648         | SEES(TX_SBRD_CTL_CSR_PARITY) \
649         | SEES(TX_CONFIG_PARITY) \
650         | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651         | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652         | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653         | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654         | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655         | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656         | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657         | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658         | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659         | SEES(TX_CREDIT_RETURN_PARITY))
660
661 /*
662  * TXE Send error flags
663  */
664 #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665 static struct flag_table send_err_status_flags[] = {
666 /* 0*/  FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667 /* 1*/  FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668 /* 2*/  FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
669 };
670
671 /*
672  * TXE Send Context Error flags and consequences
673  */
674 static struct flag_table sc_err_status_flags[] = {
675 /* 0*/  FLAG_ENTRY("InconsistentSop",
676                 SEC_PACKET_DROPPED | SEC_SC_HALTED,
677                 SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678 /* 1*/  FLAG_ENTRY("DisallowedPacket",
679                 SEC_PACKET_DROPPED | SEC_SC_HALTED,
680                 SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681 /* 2*/  FLAG_ENTRY("WriteCrossesBoundary",
682                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
683                 SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684 /* 3*/  FLAG_ENTRY("WriteOverflow",
685                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
686                 SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687 /* 4*/  FLAG_ENTRY("WriteOutOfBounds",
688                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
689                 SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
690 /* 5-63 reserved*/
691 };
692
693 /*
694  * RXE Receive Error flags
695  */
696 #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697 static struct flag_table rxe_err_status_flags[] = {
698 /* 0*/  FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699 /* 1*/  FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700 /* 2*/  FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701 /* 3*/  FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702 /* 4*/  FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703 /* 5*/  FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704 /* 6*/  FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705 /* 7*/  FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706 /* 8*/  FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707 /* 9*/  FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708 /*10*/  FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709 /*11*/  FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710 /*12*/  FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711 /*13*/  FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712 /*14*/  FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713 /*15*/  FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714 /*16*/  FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715                 RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716 /*17*/  FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717 /*18*/  FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718 /*19*/  FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719                 RXES(RBUF_BLOCK_LIST_READ_UNC)),
720 /*20*/  FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721                 RXES(RBUF_BLOCK_LIST_READ_COR)),
722 /*21*/  FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723                 RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724 /*22*/  FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725                 RXES(RBUF_CSR_QENT_CNT_PARITY)),
726 /*23*/  FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727                 RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728 /*24*/  FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729                 RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730 /*25*/  FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731 /*26*/  FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732 /*27*/  FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733                 RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734 /*28*/  FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735 /*29*/  FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736 /*30*/  FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737 /*31*/  FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738 /*32*/  FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739 /*33*/  FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740 /*34*/  FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741 /*35*/  FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742                 RXES(RBUF_FL_INITDONE_PARITY)),
743 /*36*/  FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744                 RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745 /*37*/  FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746 /*38*/  FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747 /*39*/  FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748 /*40*/  FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749                 RXES(LOOKUP_DES_PART1_UNC_COR)),
750 /*41*/  FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751                 RXES(LOOKUP_DES_PART2_PARITY)),
752 /*42*/  FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753 /*43*/  FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754 /*44*/  FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755 /*45*/  FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756 /*46*/  FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757 /*47*/  FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758 /*48*/  FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759 /*49*/  FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760 /*50*/  FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761 /*51*/  FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762 /*52*/  FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763 /*53*/  FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764 /*54*/  FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765 /*55*/  FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766 /*56*/  FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767 /*57*/  FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768 /*58*/  FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769 /*59*/  FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770 /*60*/  FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771 /*61*/  FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772 /*62*/  FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773 /*63*/  FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
774 };
775
776 /* RXE errors that will trigger an SPC freeze */
777 #define ALL_RXE_FREEZE_ERR  \
778         (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779         | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780         | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781         | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782         | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786         | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787         | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788         | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789         | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790         | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791         | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792         | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793         | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794         | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795         | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796         | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797         | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798         | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799         | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800         | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801         | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802         | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803         | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807         | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808         | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809         | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810         | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811         | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812         | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813         | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814         | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815         | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816         | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817         | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818         | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819         | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820         | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821         | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
822
823 #define RXE_FREEZE_ABORT_MASK \
824         (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825         RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826         RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
827
828 /*
829  * DCC Error Flags
830  */
831 #define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832 static struct flag_table dcc_err_flags[] = {
833         FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834         FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835         FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836         FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837         FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838         FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839         FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840         FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841         FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842         FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843         FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844         FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845         FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846         FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847         FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848         FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849         FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850         FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851         FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852         FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853         FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854         FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855         FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856         FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857         FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858         FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859         FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860         FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861         FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862         FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863         FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864         FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865         FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866         FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867         FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868         FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869         FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870         FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871         FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872         FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873         FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874         FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875         FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876         FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877         FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878         FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
879 };
880
881 /*
882  * LCB error flags
883  */
884 #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885 static struct flag_table lcb_err_flags[] = {
886 /* 0*/  FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887 /* 1*/  FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888 /* 2*/  FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889 /* 3*/  FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890                 LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891 /* 4*/  FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892 /* 5*/  FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893 /* 6*/  FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894 /* 7*/  FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895 /* 8*/  FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896 /* 9*/  FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897 /*10*/  FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898 /*11*/  FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899 /*12*/  FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900 /*13*/  FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901                 LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902 /*14*/  FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903 /*15*/  FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904 /*16*/  FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905 /*17*/  FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906 /*18*/  FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907 /*19*/  FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908                 LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909 /*20*/  FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910 /*21*/  FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911 /*22*/  FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912 /*23*/  FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913 /*24*/  FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914 /*25*/  FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915 /*26*/  FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916                 LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917 /*27*/  FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918 /*28*/  FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919                 LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920 /*29*/  FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921                 LCBE(REDUNDANT_FLIT_PARITY_ERR))
922 };
923
924 /*
925  * DC8051 Error Flags
926  */
927 #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928 static struct flag_table dc8051_err_flags[] = {
929         FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930         FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931         FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932         FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933         FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934         FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935         FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936         FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937         FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938                 D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939         FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
940 };
941
942 /*
943  * DC8051 Information Error flags
944  *
945  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
946  */
947 static struct flag_table dc8051_info_err_flags[] = {
948         FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
949         FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
950         FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
951         FLAG_ENTRY0("Serdes internal loopback failure",
952                                         FAILED_SERDES_INTERNAL_LOOPBACK),
953         FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
954         FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
955         FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
956         FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
957         FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
958         FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959         FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960         FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
961 };
962
963 /*
964  * DC8051 Information Host Information flags
965  *
966  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
967  */
968 static struct flag_table dc8051_info_host_msg_flags[] = {
969         FLAG_ENTRY0("Host request done", 0x0001),
970         FLAG_ENTRY0("BC SMA message", 0x0002),
971         FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972         FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973         FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974         FLAG_ENTRY0("External device config request", 0x0020),
975         FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976         FLAG_ENTRY0("LinkUp achieved", 0x0080),
977         FLAG_ENTRY0("Link going down", 0x0100),
978 };
979
980
981 static u32 encoded_size(u32 size);
982 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
985                                u8 *continuous);
986 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987                                   u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989                                       u8 *remote_tx_rate, u16 *link_widths);
990 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991                                      u8 *flag_bits, u16 *link_widths);
992 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
993                                   u8 *device_rev);
994 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996 static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997                             u8 *tx_polarity_inversion,
998                             u8 *rx_polarity_inversion, u8 *max_rate);
999 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000                                 unsigned int context, u64 err_status);
1001 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002 static void handle_dcc_err(struct hfi1_devdata *dd,
1003                            unsigned int context, u64 err_status);
1004 static void handle_lcb_err(struct hfi1_devdata *dd,
1005                            unsigned int context, u64 err_status);
1006 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014 static void set_partition_keys(struct hfi1_pportdata *);
1015 static const char *link_state_name(u32 state);
1016 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017                                           u32 state);
1018 static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019                            u64 *out_data);
1020 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021 static int thermal_init(struct hfi1_devdata *dd);
1022
1023 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024                                   int msecs);
1025 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026 static void handle_temp_err(struct hfi1_devdata *);
1027 static void dc_shutdown(struct hfi1_devdata *);
1028 static void dc_start(struct hfi1_devdata *);
1029
1030 /*
1031  * Error interrupt table entry.  This is used as input to the interrupt
1032  * "clear down" routine used for all second tier error interrupt register.
1033  * Second tier interrupt registers have a single bit representing them
1034  * in the top-level CceIntStatus.
1035  */
1036 struct err_reg_info {
1037         u32 status;             /* status CSR offset */
1038         u32 clear;              /* clear CSR offset */
1039         u32 mask;               /* mask CSR offset */
1040         void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041         const char *desc;
1042 };
1043
1044 #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045 #define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046 #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047
1048 /*
1049  * Helpers for building HFI and DC error interrupt table entries.  Different
1050  * helpers are needed because of inconsistent register names.
1051  */
1052 #define EE(reg, handler, desc) \
1053         { reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054                 handler, desc }
1055 #define DC_EE1(reg, handler, desc) \
1056         { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057 #define DC_EE2(reg, handler, desc) \
1058         { reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059
1060 /*
1061  * Table of the "misc" grouping of error interrupts.  Each entry refers to
1062  * another register containing more information.
1063  */
1064 static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065 /* 0*/  EE(CCE_ERR,             handle_cce_err,    "CceErr"),
1066 /* 1*/  EE(RCV_ERR,             handle_rxe_err,    "RxeErr"),
1067 /* 2*/  EE(MISC_ERR,    handle_misc_err,   "MiscErr"),
1068 /* 3*/  { 0, 0, 0, NULL }, /* reserved */
1069 /* 4*/  EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1070 /* 5*/  EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1071 /* 6*/  EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072 /* 7*/  EE(SEND_ERR,    handle_txe_err,    "TxeErr")
1073         /* the rest are reserved */
1074 };
1075
1076 /*
1077  * Index into the Various section of the interrupt sources
1078  * corresponding to the Critical Temperature interrupt.
1079  */
1080 #define TCRIT_INT_SOURCE 4
1081
1082 /*
1083  * SDMA error interrupt entry - refers to another register containing more
1084  * information.
1085  */
1086 static const struct err_reg_info sdma_eng_err =
1087         EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088
1089 static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090 /* 0*/  { 0, 0, 0, NULL }, /* PbcInt */
1091 /* 1*/  { 0, 0, 0, NULL }, /* GpioAssertInt */
1092 /* 2*/  EE(ASIC_QSFP1,  handle_qsfp_int,        "QSFP1"),
1093 /* 3*/  EE(ASIC_QSFP2,  handle_qsfp_int,        "QSFP2"),
1094 /* 4*/  { 0, 0, 0, NULL }, /* TCritInt */
1095         /* rest are reserved */
1096 };
1097
1098 /*
1099  * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100  * register can not be derived from the MTU value because 10K is not
1101  * a power of 2. Therefore, we need a constant. Everything else can
1102  * be calculated.
1103  */
1104 #define DCC_CFG_PORT_MTU_CAP_10240 7
1105
1106 /*
1107  * Table of the DC grouping of error interrupts.  Each entry refers to
1108  * another register containing more information.
1109  */
1110 static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111 /* 0*/  DC_EE1(DCC_ERR,         handle_dcc_err,        "DCC Err"),
1112 /* 1*/  DC_EE2(DC_LCB_ERR,      handle_lcb_err,        "LCB Err"),
1113 /* 2*/  DC_EE2(DC_DC8051_ERR,   handle_8051_interrupt, "DC8051 Interrupt"),
1114 /* 3*/  /* dc_lbm_int - special, see is_dc_int() */
1115         /* the rest are reserved */
1116 };
1117
1118 struct cntr_entry {
1119         /*
1120          * counter name
1121          */
1122         char *name;
1123
1124         /*
1125          * csr to read for name (if applicable)
1126          */
1127         u64 csr;
1128
1129         /*
1130          * offset into dd or ppd to store the counter's value
1131          */
1132         int offset;
1133
1134         /*
1135          * flags
1136          */
1137         u8 flags;
1138
1139         /*
1140          * accessor for stat element, context either dd or ppd
1141          */
1142         u64 (*rw_cntr)(const struct cntr_entry *,
1143                                void *context,
1144                                int vl,
1145                                int mode,
1146                                u64 data);
1147 };
1148
1149 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151
1152 #define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153 { \
1154         name, \
1155         csr, \
1156         offset, \
1157         flags, \
1158         accessor \
1159 }
1160
1161 /* 32bit RXE */
1162 #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163 CNTR_ELEM(#name, \
1164           (counter * 8 + RCV_COUNTER_ARRAY32), \
1165           0, flags | CNTR_32BIT, \
1166           port_access_u32_csr)
1167
1168 #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169 CNTR_ELEM(#name, \
1170           (counter * 8 + RCV_COUNTER_ARRAY32), \
1171           0, flags | CNTR_32BIT, \
1172           dev_access_u32_csr)
1173
1174 /* 64bit RXE */
1175 #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176 CNTR_ELEM(#name, \
1177           (counter * 8 + RCV_COUNTER_ARRAY64), \
1178           0, flags, \
1179           port_access_u64_csr)
1180
1181 #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182 CNTR_ELEM(#name, \
1183           (counter * 8 + RCV_COUNTER_ARRAY64), \
1184           0, flags, \
1185           dev_access_u64_csr)
1186
1187 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188 #define OVR_ELM(ctx) \
1189 CNTR_ELEM("RcvHdrOvr" #ctx, \
1190           (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191           0, CNTR_NORMAL, port_access_u64_csr)
1192
1193 /* 32bit TXE */
1194 #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195 CNTR_ELEM(#name, \
1196           (counter * 8 + SEND_COUNTER_ARRAY32), \
1197           0, flags | CNTR_32BIT, \
1198           port_access_u32_csr)
1199
1200 /* 64bit TXE */
1201 #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202 CNTR_ELEM(#name, \
1203           (counter * 8 + SEND_COUNTER_ARRAY64), \
1204           0, flags, \
1205           port_access_u64_csr)
1206
1207 # define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208 CNTR_ELEM(#name,\
1209           counter * 8 + SEND_COUNTER_ARRAY64, \
1210           0, \
1211           flags, \
1212           dev_access_u64_csr)
1213
1214 /* CCE */
1215 #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216 CNTR_ELEM(#name, \
1217           (counter * 8 + CCE_COUNTER_ARRAY32), \
1218           0, flags | CNTR_32BIT, \
1219           dev_access_u32_csr)
1220
1221 #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222 CNTR_ELEM(#name, \
1223           (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224           0, flags | CNTR_32BIT, \
1225           dev_access_u32_csr)
1226
1227 /* DC */
1228 #define DC_PERF_CNTR(name, counter, flags) \
1229 CNTR_ELEM(#name, \
1230           counter, \
1231           0, \
1232           flags, \
1233           dev_access_u64_csr)
1234
1235 #define DC_PERF_CNTR_LCB(name, counter, flags) \
1236 CNTR_ELEM(#name, \
1237           counter, \
1238           0, \
1239           flags, \
1240           dc_access_lcb_cntr)
1241
1242 /* ibp counters */
1243 #define SW_IBP_CNTR(name, cntr) \
1244 CNTR_ELEM(#name, \
1245           0, \
1246           0, \
1247           CNTR_SYNTH, \
1248           access_ibp_##cntr)
1249
1250 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251 {
1252         u64 val;
1253
1254         if (dd->flags & HFI1_PRESENT) {
1255                 val = readq((void __iomem *)dd->kregbase + offset);
1256                 return val;
1257         }
1258         return -1;
1259 }
1260
1261 void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262 {
1263         if (dd->flags & HFI1_PRESENT)
1264                 writeq(value, (void __iomem *)dd->kregbase + offset);
1265 }
1266
1267 void __iomem *get_csr_addr(
1268         struct hfi1_devdata *dd,
1269         u32 offset)
1270 {
1271         return (void __iomem *)dd->kregbase + offset;
1272 }
1273
1274 static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275                                  int mode, u64 value)
1276 {
1277         u64 ret;
1278
1279
1280         if (mode == CNTR_MODE_R) {
1281                 ret = read_csr(dd, csr);
1282         } else if (mode == CNTR_MODE_W) {
1283                 write_csr(dd, csr, value);
1284                 ret = value;
1285         } else {
1286                 dd_dev_err(dd, "Invalid cntr register access mode");
1287                 return 0;
1288         }
1289
1290         hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291         return ret;
1292 }
1293
1294 /* Dev Access */
1295 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296                             void *context, int vl, int mode, u64 data)
1297 {
1298         struct hfi1_devdata *dd = context;
1299
1300         if (vl != CNTR_INVALID_VL)
1301                 return 0;
1302         return read_write_csr(dd, entry->csr, mode, data);
1303 }
1304
1305 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306                             int vl, int mode, u64 data)
1307 {
1308         struct hfi1_devdata *dd = context;
1309
1310         u64 val = 0;
1311         u64 csr = entry->csr;
1312
1313         if (entry->flags & CNTR_VL) {
1314                 if (vl == CNTR_INVALID_VL)
1315                         return 0;
1316                 csr += 8 * vl;
1317         } else {
1318                 if (vl != CNTR_INVALID_VL)
1319                         return 0;
1320         }
1321
1322         val = read_write_csr(dd, csr, mode, data);
1323         return val;
1324 }
1325
1326 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327                             int vl, int mode, u64 data)
1328 {
1329         struct hfi1_devdata *dd = context;
1330         u32 csr = entry->csr;
1331         int ret = 0;
1332
1333         if (vl != CNTR_INVALID_VL)
1334                 return 0;
1335         if (mode == CNTR_MODE_R)
1336                 ret = read_lcb_csr(dd, csr, &data);
1337         else if (mode == CNTR_MODE_W)
1338                 ret = write_lcb_csr(dd, csr, data);
1339
1340         if (ret) {
1341                 dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342                 return 0;
1343         }
1344
1345         hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346         return data;
1347 }
1348
1349 /* Port Access */
1350 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351                              int vl, int mode, u64 data)
1352 {
1353         struct hfi1_pportdata *ppd = context;
1354
1355         if (vl != CNTR_INVALID_VL)
1356                 return 0;
1357         return read_write_csr(ppd->dd, entry->csr, mode, data);
1358 }
1359
1360 static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361                              void *context, int vl, int mode, u64 data)
1362 {
1363         struct hfi1_pportdata *ppd = context;
1364         u64 val;
1365         u64 csr = entry->csr;
1366
1367         if (entry->flags & CNTR_VL) {
1368                 if (vl == CNTR_INVALID_VL)
1369                         return 0;
1370                 csr += 8 * vl;
1371         } else {
1372                 if (vl != CNTR_INVALID_VL)
1373                         return 0;
1374         }
1375         val = read_write_csr(ppd->dd, csr, mode, data);
1376         return val;
1377 }
1378
1379 /* Software defined */
1380 static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381                                 u64 data)
1382 {
1383         u64 ret;
1384
1385         if (mode == CNTR_MODE_R) {
1386                 ret = *cntr;
1387         } else if (mode == CNTR_MODE_W) {
1388                 *cntr = data;
1389                 ret = data;
1390         } else {
1391                 dd_dev_err(dd, "Invalid cntr sw access mode");
1392                 return 0;
1393         }
1394
1395         hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396
1397         return ret;
1398 }
1399
1400 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401                                int vl, int mode, u64 data)
1402 {
1403         struct hfi1_pportdata *ppd = context;
1404
1405         if (vl != CNTR_INVALID_VL)
1406                 return 0;
1407         return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408 }
1409
1410 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411                                int vl, int mode, u64 data)
1412 {
1413         struct hfi1_pportdata *ppd = context;
1414
1415         if (vl != CNTR_INVALID_VL)
1416                 return 0;
1417         return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418 }
1419
1420 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421                                     void *context, int vl, int mode, u64 data)
1422 {
1423         struct hfi1_pportdata *ppd = context;
1424
1425         if (vl != CNTR_INVALID_VL)
1426                 return 0;
1427
1428         return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429 }
1430
1431 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432                                      void *context, int vl, int mode, u64 data)
1433 {
1434         struct hfi1_pportdata *ppd = context;
1435
1436         if (vl != CNTR_INVALID_VL)
1437                 return 0;
1438
1439         return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440                              mode, data);
1441 }
1442
1443 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444                                      void *context, int vl, int mode, u64 data)
1445 {
1446         struct hfi1_pportdata *ppd = context;
1447
1448         if (vl != CNTR_INVALID_VL)
1449                 return 0;
1450
1451         return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452                              mode, data);
1453 }
1454
1455 u64 get_all_cpu_total(u64 __percpu *cntr)
1456 {
1457         int cpu;
1458         u64 counter = 0;
1459
1460         for_each_possible_cpu(cpu)
1461                 counter += *per_cpu_ptr(cntr, cpu);
1462         return counter;
1463 }
1464
1465 static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466                           u64 __percpu *cntr,
1467                           int vl, int mode, u64 data)
1468 {
1469
1470         u64 ret = 0;
1471
1472         if (vl != CNTR_INVALID_VL)
1473                 return 0;
1474
1475         if (mode == CNTR_MODE_R) {
1476                 ret = get_all_cpu_total(cntr) - *z_val;
1477         } else if (mode == CNTR_MODE_W) {
1478                 /* A write can only zero the counter */
1479                 if (data == 0)
1480                         *z_val = get_all_cpu_total(cntr);
1481                 else
1482                         dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483         } else {
1484                 dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485                 return 0;
1486         }
1487
1488         return ret;
1489 }
1490
1491 static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492                               void *context, int vl, int mode, u64 data)
1493 {
1494         struct hfi1_devdata *dd = context;
1495
1496         return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497                               mode, data);
1498 }
1499
1500 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501                               void *context, int vl, int mode, u64 data)
1502 {
1503         struct hfi1_devdata *dd = context;
1504
1505         return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506                               mode, data);
1507 }
1508
1509 static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510                               void *context, int vl, int mode, u64 data)
1511 {
1512         struct hfi1_devdata *dd = context;
1513
1514         return dd->verbs_dev.n_piowait;
1515 }
1516
1517 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518                               void *context, int vl, int mode, u64 data)
1519 {
1520         struct hfi1_devdata *dd = context;
1521
1522         return dd->verbs_dev.n_txwait;
1523 }
1524
1525 static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526                                void *context, int vl, int mode, u64 data)
1527 {
1528         struct hfi1_devdata *dd = context;
1529
1530         return dd->verbs_dev.n_kmem_wait;
1531 }
1532
1533 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1534                                void *context, int vl, int mode, u64 data)
1535 {
1536         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1537
1538         return dd->verbs_dev.n_send_schedule;
1539 }
1540
1541 #define def_access_sw_cpu(cntr) \
1542 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,               \
1543                               void *context, int vl, int mode, u64 data)      \
1544 {                                                                             \
1545         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1546         return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,           \
1547                               ppd->ibport_data.cntr, vl,                      \
1548                               mode, data);                                    \
1549 }
1550
1551 def_access_sw_cpu(rc_acks);
1552 def_access_sw_cpu(rc_qacks);
1553 def_access_sw_cpu(rc_delayed_comp);
1554
1555 #define def_access_ibp_counter(cntr) \
1556 static u64 access_ibp_##cntr(const struct cntr_entry *entry,                  \
1557                                 void *context, int vl, int mode, u64 data)    \
1558 {                                                                             \
1559         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1560                                                                               \
1561         if (vl != CNTR_INVALID_VL)                                            \
1562                 return 0;                                                     \
1563                                                                               \
1564         return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,            \
1565                              mode, data);                                     \
1566 }
1567
1568 def_access_ibp_counter(loop_pkts);
1569 def_access_ibp_counter(rc_resends);
1570 def_access_ibp_counter(rnr_naks);
1571 def_access_ibp_counter(other_naks);
1572 def_access_ibp_counter(rc_timeouts);
1573 def_access_ibp_counter(pkt_drops);
1574 def_access_ibp_counter(dmawait);
1575 def_access_ibp_counter(rc_seqnak);
1576 def_access_ibp_counter(rc_dupreq);
1577 def_access_ibp_counter(rdma_seq);
1578 def_access_ibp_counter(unaligned);
1579 def_access_ibp_counter(seq_naks);
1580
1581 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1582 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1583 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1584                         CNTR_NORMAL),
1585 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1586                         CNTR_NORMAL),
1587 [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1588                         RCV_TID_FLOW_GEN_MISMATCH_CNT,
1589                         CNTR_NORMAL),
1590 [C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1591                         CNTR_NORMAL),
1592 [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1593                         CNTR_NORMAL),
1594 [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1595                         RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1596 [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1597                         CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1598 [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1599                         CNTR_NORMAL),
1600 [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1601                         CNTR_NORMAL),
1602 [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1603                         CNTR_NORMAL),
1604 [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1605                         CNTR_NORMAL),
1606 [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1607                         CNTR_NORMAL),
1608 [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1609                         CNTR_NORMAL),
1610 [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1611                         CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL),
1612 [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1613                         CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1614 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1615                               CNTR_SYNTH),
1616 [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1617 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1618                                  CNTR_SYNTH),
1619 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1620                                   CNTR_SYNTH),
1621 [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1622                                   CNTR_SYNTH),
1623 [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1624                                    DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1625 [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1626                                   DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1627                                   CNTR_SYNTH),
1628 [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1629                                 DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1630 [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1631                                CNTR_SYNTH),
1632 [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1633                               CNTR_SYNTH),
1634 [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1635                                CNTR_SYNTH),
1636 [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1637                                  CNTR_SYNTH),
1638 [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1639                                 CNTR_SYNTH),
1640 [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1641                                 CNTR_SYNTH),
1642 [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1643                                CNTR_SYNTH),
1644 [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1645                                  CNTR_SYNTH | CNTR_VL),
1646 [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1647                                 CNTR_SYNTH | CNTR_VL),
1648 [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1649 [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1650                                  CNTR_SYNTH | CNTR_VL),
1651 [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1652 [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1653                                  CNTR_SYNTH | CNTR_VL),
1654 [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1655                               CNTR_SYNTH),
1656 [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1657                                  CNTR_SYNTH | CNTR_VL),
1658 [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1659                                 CNTR_SYNTH),
1660 [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1661                                    CNTR_SYNTH | CNTR_VL),
1662 [C_DC_TOTAL_CRC] =
1663         DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1664                          CNTR_SYNTH),
1665 [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1666                                   CNTR_SYNTH),
1667 [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1668                                   CNTR_SYNTH),
1669 [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1670                                   CNTR_SYNTH),
1671 [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1672                                   CNTR_SYNTH),
1673 [C_DC_CRC_MULT_LN] =
1674         DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1675                          CNTR_SYNTH),
1676 [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1677                                     CNTR_SYNTH),
1678 [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1679                                     CNTR_SYNTH),
1680 [C_DC_SEQ_CRC_CNT] =
1681         DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1682                          CNTR_SYNTH),
1683 [C_DC_ESC0_ONLY_CNT] =
1684         DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1685                          CNTR_SYNTH),
1686 [C_DC_ESC0_PLUS1_CNT] =
1687         DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1688                          CNTR_SYNTH),
1689 [C_DC_ESC0_PLUS2_CNT] =
1690         DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1691                          CNTR_SYNTH),
1692 [C_DC_REINIT_FROM_PEER_CNT] =
1693         DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1694                          CNTR_SYNTH),
1695 [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1696                                   CNTR_SYNTH),
1697 [C_DC_MISC_FLG_CNT] =
1698         DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1699                          CNTR_SYNTH),
1700 [C_DC_PRF_GOOD_LTP_CNT] =
1701         DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1702 [C_DC_PRF_ACCEPTED_LTP_CNT] =
1703         DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1704                          CNTR_SYNTH),
1705 [C_DC_PRF_RX_FLIT_CNT] =
1706         DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1707 [C_DC_PRF_TX_FLIT_CNT] =
1708         DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1709 [C_DC_PRF_CLK_CNTR] =
1710         DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1711 [C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1712         DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1713 [C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1714         DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1715                          CNTR_SYNTH),
1716 [C_DC_PG_STS_TX_SBE_CNT] =
1717         DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1718 [C_DC_PG_STS_TX_MBE_CNT] =
1719         DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1720                          CNTR_SYNTH),
1721 [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1722                             access_sw_cpu_intr),
1723 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1724                             access_sw_cpu_rcv_limit),
1725 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1726                             access_sw_vtx_wait),
1727 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1728                             access_sw_pio_wait),
1729 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1730                             access_sw_kmem_wait),
1731 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1732                             access_sw_send_schedule),
1733 };
1734
1735 static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1736 [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1737                         CNTR_NORMAL),
1738 [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1739                         CNTR_NORMAL),
1740 [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1741                         CNTR_NORMAL),
1742 [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1743                         CNTR_NORMAL),
1744 [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1745                         CNTR_NORMAL),
1746 [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1747                         CNTR_NORMAL),
1748 [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1749                         CNTR_NORMAL),
1750 [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1751 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1752 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1753 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1754                         CNTR_SYNTH | CNTR_VL),
1755 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1756                         CNTR_SYNTH | CNTR_VL),
1757 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1758                         CNTR_SYNTH | CNTR_VL),
1759 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1760 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1761 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1762                         access_sw_link_dn_cnt),
1763 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1764                         access_sw_link_up_cnt),
1765 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1766                         access_sw_xmit_discards),
1767 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1768                         CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1769                         access_sw_xmit_discards),
1770 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1771                         access_xmit_constraint_errs),
1772 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1773                         access_rcv_constraint_errs),
1774 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1775 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1776 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1777 [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1778 [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1779 [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1780 [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1781 [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1782 [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1783 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1784 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1785 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1786 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1787                                access_sw_cpu_rc_acks),
1788 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1789                                access_sw_cpu_rc_qacks),
1790 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1791                                access_sw_cpu_rc_delayed_comp),
1792 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1793 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1794 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1795 [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1796 [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1797 [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1798 [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1799 [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1800 [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1801 [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1802 [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1803 [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1804 [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1805 [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1806 [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1807 [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1808 [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1809 [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1810 [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1811 [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1812 [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1813 [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1814 [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1815 [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1816 [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1817 [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1818 [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1819 [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1820 [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1821 [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1822 [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1823 [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1824 [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1825 [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1826 [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1827 [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1828 [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1829 [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1830 [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1831 [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1832 [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1833 [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1834 [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1835 [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1836 [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1837 [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1838 [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1839 [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1840 [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1841 [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1842 [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1843 [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1844 [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1845 [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1846 [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1847 [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1848 [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1849 [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1850 [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1851 [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1852 [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1853 [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1854 [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1855 [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1856 [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1857 [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1858 [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1859 [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1860 [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1861 [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1862 [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1863 [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1864 [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1865 [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1866 [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1867 [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1868 [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1869 [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1870 [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1871 [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1872 };
1873
1874 /* ======================================================================== */
1875
1876 /* return true if this is chip revision revision a0 */
1877 int is_a0(struct hfi1_devdata *dd)
1878 {
1879         return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1880                         & CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1881 }
1882
1883 /* return true if this is chip revision revision a */
1884 int is_ax(struct hfi1_devdata *dd)
1885 {
1886         u8 chip_rev_minor =
1887                 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1888                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
1889         return (chip_rev_minor & 0xf0) == 0;
1890 }
1891
1892 /* return true if this is chip revision revision b */
1893 int is_bx(struct hfi1_devdata *dd)
1894 {
1895         u8 chip_rev_minor =
1896                 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1897                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
1898         return !!(chip_rev_minor & 0x10);
1899 }
1900
1901 /*
1902  * Append string s to buffer buf.  Arguments curp and len are the current
1903  * position and remaining length, respectively.
1904  *
1905  * return 0 on success, 1 on out of room
1906  */
1907 static int append_str(char *buf, char **curp, int *lenp, const char *s)
1908 {
1909         char *p = *curp;
1910         int len = *lenp;
1911         int result = 0; /* success */
1912         char c;
1913
1914         /* add a comma, if first in the buffer */
1915         if (p != buf) {
1916                 if (len == 0) {
1917                         result = 1; /* out of room */
1918                         goto done;
1919                 }
1920                 *p++ = ',';
1921                 len--;
1922         }
1923
1924         /* copy the string */
1925         while ((c = *s++) != 0) {
1926                 if (len == 0) {
1927                         result = 1; /* out of room */
1928                         goto done;
1929                 }
1930                 *p++ = c;
1931                 len--;
1932         }
1933
1934 done:
1935         /* write return values */
1936         *curp = p;
1937         *lenp = len;
1938
1939         return result;
1940 }
1941
1942 /*
1943  * Using the given flag table, print a comma separated string into
1944  * the buffer.  End in '*' if the buffer is too short.
1945  */
1946 static char *flag_string(char *buf, int buf_len, u64 flags,
1947                                 struct flag_table *table, int table_size)
1948 {
1949         char extra[32];
1950         char *p = buf;
1951         int len = buf_len;
1952         int no_room = 0;
1953         int i;
1954
1955         /* make sure there is at least 2 so we can form "*" */
1956         if (len < 2)
1957                 return "";
1958
1959         len--;  /* leave room for a nul */
1960         for (i = 0; i < table_size; i++) {
1961                 if (flags & table[i].flag) {
1962                         no_room = append_str(buf, &p, &len, table[i].str);
1963                         if (no_room)
1964                                 break;
1965                         flags &= ~table[i].flag;
1966                 }
1967         }
1968
1969         /* any undocumented bits left? */
1970         if (!no_room && flags) {
1971                 snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1972                 no_room = append_str(buf, &p, &len, extra);
1973         }
1974
1975         /* add * if ran out of room */
1976         if (no_room) {
1977                 /* may need to back up to add space for a '*' */
1978                 if (len == 0)
1979                         --p;
1980                 *p++ = '*';
1981         }
1982
1983         /* add final nul - space already allocated above */
1984         *p = 0;
1985         return buf;
1986 }
1987
1988 /* first 8 CCE error interrupt source names */
1989 static const char * const cce_misc_names[] = {
1990         "CceErrInt",            /* 0 */
1991         "RxeErrInt",            /* 1 */
1992         "MiscErrInt",           /* 2 */
1993         "Reserved3",            /* 3 */
1994         "PioErrInt",            /* 4 */
1995         "SDmaErrInt",           /* 5 */
1996         "EgressErrInt",         /* 6 */
1997         "TxeErrInt"             /* 7 */
1998 };
1999
2000 /*
2001  * Return the miscellaneous error interrupt name.
2002  */
2003 static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
2004 {
2005         if (source < ARRAY_SIZE(cce_misc_names))
2006                 strncpy(buf, cce_misc_names[source], bsize);
2007         else
2008                 snprintf(buf,
2009                         bsize,
2010                         "Reserved%u",
2011                         source + IS_GENERAL_ERR_START);
2012
2013         return buf;
2014 }
2015
2016 /*
2017  * Return the SDMA engine error interrupt name.
2018  */
2019 static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2020 {
2021         snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2022         return buf;
2023 }
2024
2025 /*
2026  * Return the send context error interrupt name.
2027  */
2028 static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2029 {
2030         snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2031         return buf;
2032 }
2033
2034 static const char * const various_names[] = {
2035         "PbcInt",
2036         "GpioAssertInt",
2037         "Qsfp1Int",
2038         "Qsfp2Int",
2039         "TCritInt"
2040 };
2041
2042 /*
2043  * Return the various interrupt name.
2044  */
2045 static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2046 {
2047         if (source < ARRAY_SIZE(various_names))
2048                 strncpy(buf, various_names[source], bsize);
2049         else
2050                 snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2051         return buf;
2052 }
2053
2054 /*
2055  * Return the DC interrupt name.
2056  */
2057 static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2058 {
2059         static const char * const dc_int_names[] = {
2060                 "common",
2061                 "lcb",
2062                 "8051",
2063                 "lbm"   /* local block merge */
2064         };
2065
2066         if (source < ARRAY_SIZE(dc_int_names))
2067                 snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2068         else
2069                 snprintf(buf, bsize, "DCInt%u", source);
2070         return buf;
2071 }
2072
2073 static const char * const sdma_int_names[] = {
2074         "SDmaInt",
2075         "SdmaIdleInt",
2076         "SdmaProgressInt",
2077 };
2078
2079 /*
2080  * Return the SDMA engine interrupt name.
2081  */
2082 static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2083 {
2084         /* what interrupt */
2085         unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2086         /* which engine */
2087         unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2088
2089         if (likely(what < 3))
2090                 snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2091         else
2092                 snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2093         return buf;
2094 }
2095
2096 /*
2097  * Return the receive available interrupt name.
2098  */
2099 static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2100 {
2101         snprintf(buf, bsize, "RcvAvailInt%u", source);
2102         return buf;
2103 }
2104
2105 /*
2106  * Return the receive urgent interrupt name.
2107  */
2108 static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2109 {
2110         snprintf(buf, bsize, "RcvUrgentInt%u", source);
2111         return buf;
2112 }
2113
2114 /*
2115  * Return the send credit interrupt name.
2116  */
2117 static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2118 {
2119         snprintf(buf, bsize, "SendCreditInt%u", source);
2120         return buf;
2121 }
2122
2123 /*
2124  * Return the reserved interrupt name.
2125  */
2126 static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2127 {
2128         snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2129         return buf;
2130 }
2131
2132 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2133 {
2134         return flag_string(buf, buf_len, flags,
2135                         cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2136 }
2137
2138 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2139 {
2140         return flag_string(buf, buf_len, flags,
2141                         rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2142 }
2143
2144 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2145 {
2146         return flag_string(buf, buf_len, flags, misc_err_status_flags,
2147                         ARRAY_SIZE(misc_err_status_flags));
2148 }
2149
2150 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2151 {
2152         return flag_string(buf, buf_len, flags,
2153                         pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2154 }
2155
2156 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2157 {
2158         return flag_string(buf, buf_len, flags,
2159                         sdma_err_status_flags,
2160                         ARRAY_SIZE(sdma_err_status_flags));
2161 }
2162
2163 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2164 {
2165         return flag_string(buf, buf_len, flags,
2166                 egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2167 }
2168
2169 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2170 {
2171         return flag_string(buf, buf_len, flags,
2172                 egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2173 }
2174
2175 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2176 {
2177         return flag_string(buf, buf_len, flags,
2178                         send_err_status_flags,
2179                         ARRAY_SIZE(send_err_status_flags));
2180 }
2181
2182 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2183 {
2184         char buf[96];
2185
2186         /*
2187          * For most these errors, there is nothing that can be done except
2188          * report or record it.
2189          */
2190         dd_dev_info(dd, "CCE Error: %s\n",
2191                 cce_err_status_string(buf, sizeof(buf), reg));
2192
2193         if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2194                         && is_a0(dd)
2195                         && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2196                 /* this error requires a manual drop into SPC freeze mode */
2197                 /* then a fix up */
2198                 start_freeze_handling(dd->pport, FREEZE_SELF);
2199         }
2200 }
2201
2202 /*
2203  * Check counters for receive errors that do not have an interrupt
2204  * associated with them.
2205  */
2206 #define RCVERR_CHECK_TIME 10
2207 static void update_rcverr_timer(unsigned long opaque)
2208 {
2209         struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2210         struct hfi1_pportdata *ppd = dd->pport;
2211         u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2212
2213         if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2214                 ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2215                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2216                 set_link_down_reason(ppd,
2217                   OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2218                         OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2219                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2220         }
2221         dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2222
2223         mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224 }
2225
2226 static int init_rcverr(struct hfi1_devdata *dd)
2227 {
2228         setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2229         /* Assume the hardware counter has been reset */
2230         dd->rcv_ovfl_cnt = 0;
2231         return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2232 }
2233
2234 static void free_rcverr(struct hfi1_devdata *dd)
2235 {
2236         if (dd->rcverr_timer.data)
2237                 del_timer_sync(&dd->rcverr_timer);
2238         dd->rcverr_timer.data = 0;
2239 }
2240
2241 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2242 {
2243         char buf[96];
2244
2245         dd_dev_info(dd, "Receive Error: %s\n",
2246                 rxe_err_status_string(buf, sizeof(buf), reg));
2247
2248         if (reg & ALL_RXE_FREEZE_ERR) {
2249                 int flags = 0;
2250
2251                 /*
2252                  * Freeze mode recovery is disabled for the errors
2253                  * in RXE_FREEZE_ABORT_MASK
2254                  */
2255                 if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2256                         flags = FREEZE_ABORT;
2257
2258                 start_freeze_handling(dd->pport, flags);
2259         }
2260 }
2261
2262 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263 {
2264         char buf[96];
2265
2266         dd_dev_info(dd, "Misc Error: %s",
2267                 misc_err_status_string(buf, sizeof(buf), reg));
2268 }
2269
2270 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2271 {
2272         char buf[96];
2273
2274         dd_dev_info(dd, "PIO Error: %s\n",
2275                 pio_err_status_string(buf, sizeof(buf), reg));
2276
2277         if (reg & ALL_PIO_FREEZE_ERR)
2278                 start_freeze_handling(dd->pport, 0);
2279 }
2280
2281 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2282 {
2283         char buf[96];
2284
2285         dd_dev_info(dd, "SDMA Error: %s\n",
2286                 sdma_err_status_string(buf, sizeof(buf), reg));
2287
2288         if (reg & ALL_SDMA_FREEZE_ERR)
2289                 start_freeze_handling(dd->pport, 0);
2290 }
2291
2292 static void count_port_inactive(struct hfi1_devdata *dd)
2293 {
2294         struct hfi1_pportdata *ppd = dd->pport;
2295
2296         if (ppd->port_xmit_discards < ~(u64)0)
2297                 ppd->port_xmit_discards++;
2298 }
2299
2300 /*
2301  * We have had a "disallowed packet" error during egress. Determine the
2302  * integrity check which failed, and update relevant error counter, etc.
2303  *
2304  * Note that the SEND_EGRESS_ERR_INFO register has only a single
2305  * bit of state per integrity check, and so we can miss the reason for an
2306  * egress error if more than one packet fails the same integrity check
2307  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2308  */
2309 static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2310 {
2311         struct hfi1_pportdata *ppd = dd->pport;
2312         u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2313         u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2314         char buf[96];
2315
2316         /* clear down all observed info as quickly as possible after read */
2317         write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2318
2319         dd_dev_info(dd,
2320                 "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2321                 info, egress_err_info_string(buf, sizeof(buf), info), src);
2322
2323         /* Eventually add other counters for each bit */
2324
2325         if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2326                 if (ppd->port_xmit_discards < ~(u64)0)
2327                         ppd->port_xmit_discards++;
2328         }
2329 }
2330
2331 /*
2332  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2333  * register. Does it represent a 'port inactive' error?
2334  */
2335 static inline int port_inactive_err(u64 posn)
2336 {
2337         return (posn >= SEES(TX_LINKDOWN) &&
2338                 posn <= SEES(TX_INCORRECT_LINK_STATE));
2339 }
2340
2341 /*
2342  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2343  * register. Does it represent a 'disallowed packet' error?
2344  */
2345 static inline int disallowed_pkt_err(u64 posn)
2346 {
2347         return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2348                 posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2349 }
2350
2351 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2352 {
2353         u64 reg_copy = reg, handled = 0;
2354         char buf[96];
2355
2356         if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2357                 start_freeze_handling(dd->pport, 0);
2358         if (is_a0(dd) && (reg &
2359                     SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2360                     && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2361                 start_freeze_handling(dd->pport, 0);
2362
2363         while (reg_copy) {
2364                 int posn = fls64(reg_copy);
2365                 /*
2366                  * fls64() returns a 1-based offset, but we generally
2367                  * want 0-based offsets.
2368                  */
2369                 int shift = posn - 1;
2370
2371                 if (port_inactive_err(shift)) {
2372                         count_port_inactive(dd);
2373                         handled |= (1ULL << shift);
2374                 } else if (disallowed_pkt_err(shift)) {
2375                         handle_send_egress_err_info(dd);
2376                         handled |= (1ULL << shift);
2377                 }
2378                 clear_bit(shift, (unsigned long *)&reg_copy);
2379         }
2380
2381         reg &= ~handled;
2382
2383         if (reg)
2384                 dd_dev_info(dd, "Egress Error: %s\n",
2385                         egress_err_status_string(buf, sizeof(buf), reg));
2386 }
2387
2388 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2389 {
2390         char buf[96];
2391
2392         dd_dev_info(dd, "Send Error: %s\n",
2393                 send_err_status_string(buf, sizeof(buf), reg));
2394
2395 }
2396
2397 /*
2398  * The maximum number of times the error clear down will loop before
2399  * blocking a repeating error.  This value is arbitrary.
2400  */
2401 #define MAX_CLEAR_COUNT 20
2402
2403 /*
2404  * Clear and handle an error register.  All error interrupts are funneled
2405  * through here to have a central location to correctly handle single-
2406  * or multi-shot errors.
2407  *
2408  * For non per-context registers, call this routine with a context value
2409  * of 0 so the per-context offset is zero.
2410  *
2411  * If the handler loops too many times, assume that something is wrong
2412  * and can't be fixed, so mask the error bits.
2413  */
2414 static void interrupt_clear_down(struct hfi1_devdata *dd,
2415                                  u32 context,
2416                                  const struct err_reg_info *eri)
2417 {
2418         u64 reg;
2419         u32 count;
2420
2421         /* read in a loop until no more errors are seen */
2422         count = 0;
2423         while (1) {
2424                 reg = read_kctxt_csr(dd, context, eri->status);
2425                 if (reg == 0)
2426                         break;
2427                 write_kctxt_csr(dd, context, eri->clear, reg);
2428                 if (likely(eri->handler))
2429                         eri->handler(dd, context, reg);
2430                 count++;
2431                 if (count > MAX_CLEAR_COUNT) {
2432                         u64 mask;
2433
2434                         dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2435                                 eri->desc, reg);
2436                         /*
2437                          * Read-modify-write so any other masked bits
2438                          * remain masked.
2439                          */
2440                         mask = read_kctxt_csr(dd, context, eri->mask);
2441                         mask &= ~reg;
2442                         write_kctxt_csr(dd, context, eri->mask, mask);
2443                         break;
2444                 }
2445         }
2446 }
2447
2448 /*
2449  * CCE block "misc" interrupt.  Source is < 16.
2450  */
2451 static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2452 {
2453         const struct err_reg_info *eri = &misc_errs[source];
2454
2455         if (eri->handler) {
2456                 interrupt_clear_down(dd, 0, eri);
2457         } else {
2458                 dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2459                         source);
2460         }
2461 }
2462
2463 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2464 {
2465         return flag_string(buf, buf_len, flags,
2466                         sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2467 }
2468
2469 /*
2470  * Send context error interrupt.  Source (hw_context) is < 160.
2471  *
2472  * All send context errors cause the send context to halt.  The normal
2473  * clear-down mechanism cannot be used because we cannot clear the
2474  * error bits until several other long-running items are done first.
2475  * This is OK because with the context halted, nothing else is going
2476  * to happen on it anyway.
2477  */
2478 static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2479                                 unsigned int hw_context)
2480 {
2481         struct send_context_info *sci;
2482         struct send_context *sc;
2483         char flags[96];
2484         u64 status;
2485         u32 sw_index;
2486
2487         sw_index = dd->hw_to_sw[hw_context];
2488         if (sw_index >= dd->num_send_contexts) {
2489                 dd_dev_err(dd,
2490                         "out of range sw index %u for send context %u\n",
2491                         sw_index, hw_context);
2492                 return;
2493         }
2494         sci = &dd->send_contexts[sw_index];
2495         sc = sci->sc;
2496         if (!sc) {
2497                 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2498                         sw_index, hw_context);
2499                 return;
2500         }
2501
2502         /* tell the software that a halt has begun */
2503         sc_stop(sc, SCF_HALTED);
2504
2505         status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2506
2507         dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2508                 send_context_err_status_string(flags, sizeof(flags), status));
2509
2510         if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2511                 handle_send_egress_err_info(dd);
2512
2513         /*
2514          * Automatically restart halted kernel contexts out of interrupt
2515          * context.  User contexts must ask the driver to restart the context.
2516          */
2517         if (sc->type != SC_USER)
2518                 queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2519 }
2520
2521 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2522                                 unsigned int source, u64 status)
2523 {
2524         struct sdma_engine *sde;
2525
2526         sde = &dd->per_sdma[source];
2527 #ifdef CONFIG_SDMA_VERBOSITY
2528         dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2529                    slashstrip(__FILE__), __LINE__, __func__);
2530         dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2531                    sde->this_idx, source, (unsigned long long)status);
2532 #endif
2533         sdma_engine_error(sde, status);
2534 }
2535
2536 /*
2537  * CCE block SDMA error interrupt.  Source is < 16.
2538  */
2539 static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2540 {
2541 #ifdef CONFIG_SDMA_VERBOSITY
2542         struct sdma_engine *sde = &dd->per_sdma[source];
2543
2544         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2545                    slashstrip(__FILE__), __LINE__, __func__);
2546         dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2547                    source);
2548         sdma_dumpstate(sde);
2549 #endif
2550         interrupt_clear_down(dd, source, &sdma_eng_err);
2551 }
2552
2553 /*
2554  * CCE block "various" interrupt.  Source is < 8.
2555  */
2556 static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2557 {
2558         const struct err_reg_info *eri = &various_err[source];
2559
2560         /*
2561          * TCritInt cannot go through interrupt_clear_down()
2562          * because it is not a second tier interrupt. The handler
2563          * should be called directly.
2564          */
2565         if (source == TCRIT_INT_SOURCE)
2566                 handle_temp_err(dd);
2567         else if (eri->handler)
2568                 interrupt_clear_down(dd, 0, eri);
2569         else
2570                 dd_dev_info(dd,
2571                         "%s: Unimplemented/reserved interrupt %d\n",
2572                         __func__, source);
2573 }
2574
2575 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2576 {
2577         /* source is always zero */
2578         struct hfi1_pportdata *ppd = dd->pport;
2579         unsigned long flags;
2580         u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2581
2582         if (reg & QSFP_HFI0_MODPRST_N) {
2583
2584                 dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2585                                 __func__);
2586
2587                 if (!qsfp_mod_present(ppd)) {
2588                         ppd->driver_link_ready = 0;
2589                         /*
2590                          * Cable removed, reset all our information about the
2591                          * cache and cable capabilities
2592                          */
2593
2594                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2595                         /*
2596                          * We don't set cache_refresh_required here as we expect
2597                          * an interrupt when a cable is inserted
2598                          */
2599                         ppd->qsfp_info.cache_valid = 0;
2600                         ppd->qsfp_info.qsfp_interrupt_functional = 0;
2601                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2602                                                 flags);
2603                         write_csr(dd,
2604                                         dd->hfi1_id ?
2605                                                 ASIC_QSFP2_INVERT :
2606                                                 ASIC_QSFP1_INVERT,
2607                                 qsfp_int_mgmt);
2608                         if (ppd->host_link_state == HLS_DN_POLL) {
2609                                 /*
2610                                  * The link is still in POLL. This means
2611                                  * that the normal link down processing
2612                                  * will not happen. We have to do it here
2613                                  * before turning the DC off.
2614                                  */
2615                                 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2616                         }
2617                 } else {
2618                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2619                         ppd->qsfp_info.cache_valid = 0;
2620                         ppd->qsfp_info.cache_refresh_required = 1;
2621                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2622                                                 flags);
2623
2624                         qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2625                         write_csr(dd,
2626                                         dd->hfi1_id ?
2627                                                 ASIC_QSFP2_INVERT :
2628                                                 ASIC_QSFP1_INVERT,
2629                                 qsfp_int_mgmt);
2630                 }
2631         }
2632
2633         if (reg & QSFP_HFI0_INT_N) {
2634
2635                 dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2636                                 __func__);
2637                 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2638                 ppd->qsfp_info.check_interrupt_flags = 1;
2639                 ppd->qsfp_info.qsfp_interrupt_functional = 1;
2640                 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2641         }
2642
2643         /* Schedule the QSFP work only if there is a cable attached. */
2644         if (qsfp_mod_present(ppd))
2645                 queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2646 }
2647
2648 static int request_host_lcb_access(struct hfi1_devdata *dd)
2649 {
2650         int ret;
2651
2652         ret = do_8051_command(dd, HCMD_MISC,
2653                 (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2654                 NULL);
2655         if (ret != HCMD_SUCCESS) {
2656                 dd_dev_err(dd, "%s: command failed with error %d\n",
2657                         __func__, ret);
2658         }
2659         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2660 }
2661
2662 static int request_8051_lcb_access(struct hfi1_devdata *dd)
2663 {
2664         int ret;
2665
2666         ret = do_8051_command(dd, HCMD_MISC,
2667                 (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2668                 NULL);
2669         if (ret != HCMD_SUCCESS) {
2670                 dd_dev_err(dd, "%s: command failed with error %d\n",
2671                         __func__, ret);
2672         }
2673         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2674 }
2675
2676 /*
2677  * Set the LCB selector - allow host access.  The DCC selector always
2678  * points to the host.
2679  */
2680 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2681 {
2682         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2683                                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2684                                 | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2685 }
2686
2687 /*
2688  * Clear the LCB selector - allow 8051 access.  The DCC selector always
2689  * points to the host.
2690  */
2691 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2692 {
2693         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2694                                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2695 }
2696
2697 /*
2698  * Acquire LCB access from the 8051.  If the host already has access,
2699  * just increment a counter.  Otherwise, inform the 8051 that the
2700  * host is taking access.
2701  *
2702  * Returns:
2703  *      0 on success
2704  *      -EBUSY if the 8051 has control and cannot be disturbed
2705  *      -errno if unable to acquire access from the 8051
2706  */
2707 int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2708 {
2709         struct hfi1_pportdata *ppd = dd->pport;
2710         int ret = 0;
2711
2712         /*
2713          * Use the host link state lock so the operation of this routine
2714          * { link state check, selector change, count increment } can occur
2715          * as a unit against a link state change.  Otherwise there is a
2716          * race between the state change and the count increment.
2717          */
2718         if (sleep_ok) {
2719                 mutex_lock(&ppd->hls_lock);
2720         } else {
2721                 while (!mutex_trylock(&ppd->hls_lock))
2722                         udelay(1);
2723         }
2724
2725         /* this access is valid only when the link is up */
2726         if ((ppd->host_link_state & HLS_UP) == 0) {
2727                 dd_dev_info(dd, "%s: link state %s not up\n",
2728                         __func__, link_state_name(ppd->host_link_state));
2729                 ret = -EBUSY;
2730                 goto done;
2731         }
2732
2733         if (dd->lcb_access_count == 0) {
2734                 ret = request_host_lcb_access(dd);
2735                 if (ret) {
2736                         dd_dev_err(dd,
2737                                 "%s: unable to acquire LCB access, err %d\n",
2738                                 __func__, ret);
2739                         goto done;
2740                 }
2741                 set_host_lcb_access(dd);
2742         }
2743         dd->lcb_access_count++;
2744 done:
2745         mutex_unlock(&ppd->hls_lock);
2746         return ret;
2747 }
2748
2749 /*
2750  * Release LCB access by decrementing the use count.  If the count is moving
2751  * from 1 to 0, inform 8051 that it has control back.
2752  *
2753  * Returns:
2754  *      0 on success
2755  *      -errno if unable to release access to the 8051
2756  */
2757 int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2758 {
2759         int ret = 0;
2760
2761         /*
2762          * Use the host link state lock because the acquire needed it.
2763          * Here, we only need to keep { selector change, count decrement }
2764          * as a unit.
2765          */
2766         if (sleep_ok) {
2767                 mutex_lock(&dd->pport->hls_lock);
2768         } else {
2769                 while (!mutex_trylock(&dd->pport->hls_lock))
2770                         udelay(1);
2771         }
2772
2773         if (dd->lcb_access_count == 0) {
2774                 dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2775                         __func__);
2776                 goto done;
2777         }
2778
2779         if (dd->lcb_access_count == 1) {
2780                 set_8051_lcb_access(dd);
2781                 ret = request_8051_lcb_access(dd);
2782                 if (ret) {
2783                         dd_dev_err(dd,
2784                                 "%s: unable to release LCB access, err %d\n",
2785                                 __func__, ret);
2786                         /* restore host access if the grant didn't work */
2787                         set_host_lcb_access(dd);
2788                         goto done;
2789                 }
2790         }
2791         dd->lcb_access_count--;
2792 done:
2793         mutex_unlock(&dd->pport->hls_lock);
2794         return ret;
2795 }
2796
2797 /*
2798  * Initialize LCB access variables and state.  Called during driver load,
2799  * after most of the initialization is finished.
2800  *
2801  * The DC default is LCB access on for the host.  The driver defaults to
2802  * leaving access to the 8051.  Assign access now - this constrains the call
2803  * to this routine to be after all LCB set-up is done.  In particular, after
2804  * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2805  */
2806 static void init_lcb_access(struct hfi1_devdata *dd)
2807 {
2808         dd->lcb_access_count = 0;
2809 }
2810
2811 /*
2812  * Write a response back to a 8051 request.
2813  */
2814 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2815 {
2816         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2817                 DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2818                 | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2819                 | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2820 }
2821
2822 /*
2823  * Handle requests from the 8051.
2824  */
2825 static void handle_8051_request(struct hfi1_devdata *dd)
2826 {
2827         u64 reg;
2828         u16 data;
2829         u8 type;
2830
2831         reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2832         if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2833                 return; /* no request */
2834
2835         /* zero out COMPLETED so the response is seen */
2836         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2837
2838         /* extract request details */
2839         type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2840                         & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2841         data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2842                         & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2843
2844         switch (type) {
2845         case HREQ_LOAD_CONFIG:
2846         case HREQ_SAVE_CONFIG:
2847         case HREQ_READ_CONFIG:
2848         case HREQ_SET_TX_EQ_ABS:
2849         case HREQ_SET_TX_EQ_REL:
2850         case HREQ_ENABLE:
2851                 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2852                         type);
2853                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2854                 break;
2855
2856         case HREQ_CONFIG_DONE:
2857                 hreq_response(dd, HREQ_SUCCESS, 0);
2858                 break;
2859
2860         case HREQ_INTERFACE_TEST:
2861                 hreq_response(dd, HREQ_SUCCESS, data);
2862                 break;
2863
2864         default:
2865                 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2866                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2867                 break;
2868         }
2869 }
2870
2871 static void write_global_credit(struct hfi1_devdata *dd,
2872                                 u8 vau, u16 total, u16 shared)
2873 {
2874         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2875                 ((u64)total
2876                         << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2877                 | ((u64)shared
2878                         << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2879                 | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2880 }
2881
2882 /*
2883  * Set up initial VL15 credits of the remote.  Assumes the rest of
2884  * the CM credit registers are zero from a previous global or credit reset .
2885  */
2886 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2887 {
2888         /* leave shared count at zero for both global and VL15 */
2889         write_global_credit(dd, vau, vl15buf, 0);
2890
2891         /* We may need some credits for another VL when sending packets
2892          * with the snoop interface. Dividing it down the middle for VL15
2893          * and VL0 should suffice.
2894          */
2895         if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2896                 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2897                     << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2898                 write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2899                     << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2900         } else {
2901                 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2902                         << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2903         }
2904 }
2905
2906 /*
2907  * Zero all credit details from the previous connection and
2908  * reset the CM manager's internal counters.
2909  */
2910 void reset_link_credits(struct hfi1_devdata *dd)
2911 {
2912         int i;
2913
2914         /* remove all previous VL credit limits */
2915         for (i = 0; i < TXE_NUM_DATA_VL; i++)
2916                 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2917         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2918         write_global_credit(dd, 0, 0, 0);
2919         /* reset the CM block */
2920         pio_send_control(dd, PSC_CM_RESET);
2921 }
2922
2923 /* convert a vCU to a CU */
2924 static u32 vcu_to_cu(u8 vcu)
2925 {
2926         return 1 << vcu;
2927 }
2928
2929 /* convert a CU to a vCU */
2930 static u8 cu_to_vcu(u32 cu)
2931 {
2932         return ilog2(cu);
2933 }
2934
2935 /* convert a vAU to an AU */
2936 static u32 vau_to_au(u8 vau)
2937 {
2938         return 8 * (1 << vau);
2939 }
2940
2941 static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2942 {
2943         ppd->sm_trap_qp = 0x0;
2944         ppd->sa_qp = 0x1;
2945 }
2946
2947 /*
2948  * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2949  */
2950 static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2951 {
2952         u64 reg;
2953
2954         /* clear lcb run: LCB_CFG_RUN.EN = 0 */
2955         write_csr(dd, DC_LCB_CFG_RUN, 0);
2956         /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2957         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2958                 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2959         /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2960         dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2961         reg = read_csr(dd, DCC_CFG_RESET);
2962         write_csr(dd, DCC_CFG_RESET,
2963                 reg
2964                 | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2965                 | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2966         (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2967         if (!abort) {
2968                 udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2969                 write_csr(dd, DCC_CFG_RESET, reg);
2970                 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2971         }
2972 }
2973
2974 /*
2975  * This routine should be called after the link has been transitioned to
2976  * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2977  * reset).
2978  *
2979  * The expectation is that the caller of this routine would have taken
2980  * care of properly transitioning the link into the correct state.
2981  */
2982 static void dc_shutdown(struct hfi1_devdata *dd)
2983 {
2984         unsigned long flags;
2985
2986         spin_lock_irqsave(&dd->dc8051_lock, flags);
2987         if (dd->dc_shutdown) {
2988                 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2989                 return;
2990         }
2991         dd->dc_shutdown = 1;
2992         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2993         /* Shutdown the LCB */
2994         lcb_shutdown(dd, 1);
2995         /* Going to OFFLINE would have causes the 8051 to put the
2996          * SerDes into reset already. Just need to shut down the 8051,
2997          * itself. */
2998         write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2999 }
3000
3001 /* Calling this after the DC has been brought out of reset should not
3002  * do any damage. */
3003 static void dc_start(struct hfi1_devdata *dd)
3004 {
3005         unsigned long flags;
3006         int ret;
3007
3008         spin_lock_irqsave(&dd->dc8051_lock, flags);
3009         if (!dd->dc_shutdown)
3010                 goto done;
3011         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3012         /* Take the 8051 out of reset */
3013         write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3014         /* Wait until 8051 is ready */
3015         ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3016         if (ret) {
3017                 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3018                         __func__);
3019         }
3020         /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3021         write_csr(dd, DCC_CFG_RESET, 0x10);
3022         /* lcb_shutdown() with abort=1 does not restore these */
3023         write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3024         spin_lock_irqsave(&dd->dc8051_lock, flags);
3025         dd->dc_shutdown = 0;
3026 done:
3027         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3028 }
3029
3030 /*
3031  * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3032  */
3033 static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3034 {
3035         u64 rx_radr, tx_radr;
3036         u32 version;
3037
3038         if (dd->icode != ICODE_FPGA_EMULATION)
3039                 return;
3040
3041         /*
3042          * These LCB defaults on emulator _s are good, nothing to do here:
3043          *      LCB_CFG_TX_FIFOS_RADR
3044          *      LCB_CFG_RX_FIFOS_RADR
3045          *      LCB_CFG_LN_DCLK
3046          *      LCB_CFG_IGNORE_LOST_RCLK
3047          */
3048         if (is_emulator_s(dd))
3049                 return;
3050         /* else this is _p */
3051
3052         version = emulator_rev(dd);
3053         if (!is_a0(dd))
3054                 version = 0x2d; /* all B0 use 0x2d or higher settings */
3055
3056         if (version <= 0x12) {
3057                 /* release 0x12 and below */
3058
3059                 /*
3060                  * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3061                  * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3062                  * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3063                  */
3064                 rx_radr =
3065                       0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3066                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3067                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3068                 /*
3069                  * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3070                  * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3071                  */
3072                 tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073         } else if (version <= 0x18) {
3074                 /* release 0x13 up to 0x18 */
3075                 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3076                 rx_radr =
3077                       0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080                 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081         } else if (version == 0x19) {
3082                 /* release 0x19 */
3083                 /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3084                 rx_radr =
3085                       0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088                 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089         } else if (version == 0x1a) {
3090                 /* release 0x1a */
3091                 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3092                 rx_radr =
3093                       0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096                 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097                 write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3098         } else {
3099                 /* release 0x1b and higher */
3100                 /* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3101                 rx_radr =
3102                       0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3103                     | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3104                     | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3105                 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3106         }
3107
3108         write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3109         /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3110         write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3111                 DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3112         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3113 }
3114
3115 /*
3116  * Handle a SMA idle message
3117  *
3118  * This is a work-queue function outside of the interrupt.
3119  */
3120 void handle_sma_message(struct work_struct *work)
3121 {
3122         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3123                                                         sma_message_work);
3124         struct hfi1_devdata *dd = ppd->dd;
3125         u64 msg;
3126         int ret;
3127
3128         /* msg is bytes 1-4 of the 40-bit idle message - the command code
3129            is stripped off */
3130         ret = read_idle_sma(dd, &msg);
3131         if (ret)
3132                 return;
3133         dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3134         /*
3135          * React to the SMA message.  Byte[1] (0 for us) is the command.
3136          */
3137         switch (msg & 0xff) {
3138         case SMA_IDLE_ARM:
3139                 /*
3140                  * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3141                  * State Transitions
3142                  *
3143                  * Only expected in INIT or ARMED, discard otherwise.
3144                  */
3145                 if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3146                         ppd->neighbor_normal = 1;
3147                 break;
3148         case SMA_IDLE_ACTIVE:
3149                 /*
3150                  * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3151                  * State Transitions
3152                  *
3153                  * Can activate the node.  Discard otherwise.
3154                  */
3155                 if (ppd->host_link_state == HLS_UP_ARMED
3156                                         && ppd->is_active_optimize_enabled) {
3157                         ppd->neighbor_normal = 1;
3158                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
3159                         if (ret)
3160                                 dd_dev_err(
3161                                         dd,
3162                                         "%s: received Active SMA idle message, couldn't set link to Active\n",
3163                                         __func__);
3164                 }
3165                 break;
3166         default:
3167                 dd_dev_err(dd,
3168                         "%s: received unexpected SMA idle message 0x%llx\n",
3169                         __func__, msg);
3170                 break;
3171         }
3172 }
3173
3174 static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3175 {
3176         u64 rcvctrl;
3177         unsigned long flags;
3178
3179         spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3180         rcvctrl = read_csr(dd, RCV_CTRL);
3181         rcvctrl |= add;
3182         rcvctrl &= ~clear;
3183         write_csr(dd, RCV_CTRL, rcvctrl);
3184         spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3185 }
3186
3187 static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3188 {
3189         adjust_rcvctrl(dd, add, 0);
3190 }
3191
3192 static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3193 {
3194         adjust_rcvctrl(dd, 0, clear);
3195 }
3196
3197 /*
3198  * Called from all interrupt handlers to start handling an SPC freeze.
3199  */
3200 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3201 {
3202         struct hfi1_devdata *dd = ppd->dd;
3203         struct send_context *sc;
3204         int i;
3205
3206         if (flags & FREEZE_SELF)
3207                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3208
3209         /* enter frozen mode */
3210         dd->flags |= HFI1_FROZEN;
3211
3212         /* notify all SDMA engines that they are going into a freeze */
3213         sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3214
3215         /* do halt pre-handling on all enabled send contexts */
3216         for (i = 0; i < dd->num_send_contexts; i++) {
3217                 sc = dd->send_contexts[i].sc;
3218                 if (sc && (sc->flags & SCF_ENABLED))
3219                         sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3220         }
3221
3222         /* Send context are frozen. Notify user space */
3223         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3224
3225         if (flags & FREEZE_ABORT) {
3226                 dd_dev_err(dd,
3227                            "Aborted freeze recovery. Please REBOOT system\n");
3228                 return;
3229         }
3230         /* queue non-interrupt handler */
3231         queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3232 }
3233
3234 /*
3235  * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3236  * depending on the "freeze" parameter.
3237  *
3238  * No need to return an error if it times out, our only option
3239  * is to proceed anyway.
3240  */
3241 static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3242 {
3243         unsigned long timeout;
3244         u64 reg;
3245
3246         timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3247         while (1) {
3248                 reg = read_csr(dd, CCE_STATUS);
3249                 if (freeze) {
3250                         /* waiting until all indicators are set */
3251                         if ((reg & ALL_FROZE) == ALL_FROZE)
3252                                 return; /* all done */
3253                 } else {
3254                         /* waiting until all indicators are clear */
3255                         if ((reg & ALL_FROZE) == 0)
3256                                 return; /* all done */
3257                 }
3258
3259                 if (time_after(jiffies, timeout)) {
3260                         dd_dev_err(dd,
3261                                 "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3262                                 freeze ? "" : "un",
3263                                 reg & ALL_FROZE,
3264                                 freeze ? ALL_FROZE : 0ull);
3265                         return;
3266                 }
3267                 usleep_range(80, 120);
3268         }
3269 }
3270
3271 /*
3272  * Do all freeze handling for the RXE block.
3273  */
3274 static void rxe_freeze(struct hfi1_devdata *dd)
3275 {
3276         int i;
3277
3278         /* disable port */
3279         clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3280
3281         /* disable all receive contexts */
3282         for (i = 0; i < dd->num_rcv_contexts; i++)
3283                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3284 }
3285
3286 /*
3287  * Unfreeze handling for the RXE block - kernel contexts only.
3288  * This will also enable the port.  User contexts will do unfreeze
3289  * handling on a per-context basis as they call into the driver.
3290  *
3291  */
3292 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3293 {
3294         int i;
3295
3296         /* enable all kernel contexts */
3297         for (i = 0; i < dd->n_krcv_queues; i++)
3298                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3299
3300         /* enable port */
3301         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3302 }
3303
3304 /*
3305  * Non-interrupt SPC freeze handling.
3306  *
3307  * This is a work-queue function outside of the triggering interrupt.
3308  */
3309 void handle_freeze(struct work_struct *work)
3310 {
3311         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3312                                                                 freeze_work);
3313         struct hfi1_devdata *dd = ppd->dd;
3314
3315         /* wait for freeze indicators on all affected blocks */
3316         dd_dev_info(dd, "Entering SPC freeze\n");
3317         wait_for_freeze_status(dd, 1);
3318
3319         /* SPC is now frozen */
3320
3321         /* do send PIO freeze steps */
3322         pio_freeze(dd);
3323
3324         /* do send DMA freeze steps */
3325         sdma_freeze(dd);
3326
3327         /* do send egress freeze steps - nothing to do */
3328
3329         /* do receive freeze steps */
3330         rxe_freeze(dd);
3331
3332         /*
3333          * Unfreeze the hardware - clear the freeze, wait for each
3334          * block's frozen bit to clear, then clear the frozen flag.
3335          */
3336         write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3337         wait_for_freeze_status(dd, 0);
3338
3339         if (is_a0(dd)) {
3340                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3341                 wait_for_freeze_status(dd, 1);
3342                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3343                 wait_for_freeze_status(dd, 0);
3344         }
3345
3346         /* do send PIO unfreeze steps for kernel contexts */
3347         pio_kernel_unfreeze(dd);
3348
3349         /* do send DMA unfreeze steps */
3350         sdma_unfreeze(dd);
3351
3352         /* do send egress unfreeze steps - nothing to do */
3353
3354         /* do receive unfreeze steps for kernel contexts */
3355         rxe_kernel_unfreeze(dd);
3356
3357         /*
3358          * The unfreeze procedure touches global device registers when
3359          * it disables and re-enables RXE. Mark the device unfrozen
3360          * after all that is done so other parts of the driver waiting
3361          * for the device to unfreeze don't do things out of order.
3362          *
3363          * The above implies that the meaning of HFI1_FROZEN flag is
3364          * "Device has gone into freeze mode and freeze mode handling
3365          * is still in progress."
3366          *
3367          * The flag will be removed when freeze mode processing has
3368          * completed.
3369          */
3370         dd->flags &= ~HFI1_FROZEN;
3371         wake_up(&dd->event_queue);
3372
3373         /* no longer frozen */
3374         dd_dev_err(dd, "Exiting SPC freeze\n");
3375 }
3376
3377 /*
3378  * Handle a link up interrupt from the 8051.
3379  *
3380  * This is a work-queue function outside of the interrupt.
3381  */
3382 void handle_link_up(struct work_struct *work)
3383 {
3384         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3385                                                                 link_up_work);
3386         set_link_state(ppd, HLS_UP_INIT);
3387
3388         /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3389         read_ltp_rtt(ppd->dd);
3390         /*
3391          * OPA specifies that certain counters are cleared on a transition
3392          * to link up, so do that.
3393          */
3394         clear_linkup_counters(ppd->dd);
3395         /*
3396          * And (re)set link up default values.
3397          */
3398         set_linkup_defaults(ppd);
3399
3400         /* enforce link speed enabled */
3401         if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3402                 /* oops - current speed is not enabled, bounce */
3403                 dd_dev_err(ppd->dd,
3404                         "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3405                         ppd->link_speed_active, ppd->link_speed_enabled);
3406                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3407                         OPA_LINKDOWN_REASON_SPEED_POLICY);
3408                 set_link_state(ppd, HLS_DN_OFFLINE);
3409                 start_link(ppd);
3410         }
3411 }
3412
3413 /* Several pieces of LNI information were cached for SMA in ppd.
3414  * Reset these on link down */
3415 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3416 {
3417         ppd->neighbor_guid = 0;
3418         ppd->neighbor_port_number = 0;
3419         ppd->neighbor_type = 0;
3420         ppd->neighbor_fm_security = 0;
3421 }
3422
3423 /*
3424  * Handle a link down interrupt from the 8051.
3425  *
3426  * This is a work-queue function outside of the interrupt.
3427  */
3428 void handle_link_down(struct work_struct *work)
3429 {
3430         u8 lcl_reason, neigh_reason = 0;
3431         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3432                                                                 link_down_work);
3433
3434         /* go offline first, then deal with reasons */
3435         set_link_state(ppd, HLS_DN_OFFLINE);
3436
3437         lcl_reason = 0;
3438         read_planned_down_reason_code(ppd->dd, &neigh_reason);
3439
3440         /*
3441          * If no reason, assume peer-initiated but missed
3442          * LinkGoingDown idle flits.
3443          */
3444         if (neigh_reason == 0)
3445                 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3446
3447         set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3448
3449         reset_neighbor_info(ppd);
3450
3451         /* disable the port */
3452         clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3453
3454         /* If there is no cable attached, turn the DC off. Otherwise,
3455          * start the link bring up. */
3456         if (!qsfp_mod_present(ppd))
3457                 dc_shutdown(ppd->dd);
3458         else
3459                 start_link(ppd);
3460 }
3461
3462 void handle_link_bounce(struct work_struct *work)
3463 {
3464         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3465                                                         link_bounce_work);
3466
3467         /*
3468          * Only do something if the link is currently up.
3469          */
3470         if (ppd->host_link_state & HLS_UP) {
3471                 set_link_state(ppd, HLS_DN_OFFLINE);
3472                 start_link(ppd);
3473         } else {
3474                 dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3475                         __func__, link_state_name(ppd->host_link_state));
3476         }
3477 }
3478
3479 /*
3480  * Mask conversion: Capability exchange to Port LTP.  The capability
3481  * exchange has an implicit 16b CRC that is mandatory.
3482  */
3483 static int cap_to_port_ltp(int cap)
3484 {
3485         int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3486
3487         if (cap & CAP_CRC_14B)
3488                 port_ltp |= PORT_LTP_CRC_MODE_14;
3489         if (cap & CAP_CRC_48B)
3490                 port_ltp |= PORT_LTP_CRC_MODE_48;
3491         if (cap & CAP_CRC_12B_16B_PER_LANE)
3492                 port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3493
3494         return port_ltp;
3495 }
3496
3497 /*
3498  * Convert an OPA Port LTP mask to capability mask
3499  */
3500 int port_ltp_to_cap(int port_ltp)
3501 {
3502         int cap_mask = 0;
3503
3504         if (port_ltp & PORT_LTP_CRC_MODE_14)
3505                 cap_mask |= CAP_CRC_14B;
3506         if (port_ltp & PORT_LTP_CRC_MODE_48)
3507                 cap_mask |= CAP_CRC_48B;
3508         if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3509                 cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3510
3511         return cap_mask;
3512 }
3513
3514 /*
3515  * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3516  */
3517 static int lcb_to_port_ltp(int lcb_crc)
3518 {
3519         int port_ltp = 0;
3520
3521         if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3522                 port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3523         else if (lcb_crc == LCB_CRC_48B)
3524                 port_ltp = PORT_LTP_CRC_MODE_48;
3525         else if (lcb_crc == LCB_CRC_14B)
3526                 port_ltp = PORT_LTP_CRC_MODE_14;
3527         else
3528                 port_ltp = PORT_LTP_CRC_MODE_16;
3529
3530         return port_ltp;
3531 }
3532
3533 /*
3534  * Our neighbor has indicated that we are allowed to act as a fabric
3535  * manager, so place the full management partition key in the second
3536  * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3537  * that we should already have the limited management partition key in
3538  * array element 1, and also that the port is not yet up when
3539  * add_full_mgmt_pkey() is invoked.
3540  */
3541 static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3542 {
3543         struct hfi1_devdata *dd = ppd->dd;
3544
3545         /* Sanity check - ppd->pkeys[2] should be 0 */
3546         if (ppd->pkeys[2] != 0)
3547                 dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3548                            __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3549         ppd->pkeys[2] = FULL_MGMT_P_KEY;
3550         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3551 }
3552
3553 /*
3554  * Convert the given link width to the OPA link width bitmask.
3555  */
3556 static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3557 {
3558         switch (width) {
3559         case 0:
3560                 /*
3561                  * Simulator and quick linkup do not set the width.
3562                  * Just set it to 4x without complaint.
3563                  */
3564                 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3565                         return OPA_LINK_WIDTH_4X;
3566                 return 0; /* no lanes up */
3567         case 1: return OPA_LINK_WIDTH_1X;
3568         case 2: return OPA_LINK_WIDTH_2X;
3569         case 3: return OPA_LINK_WIDTH_3X;
3570         default:
3571                 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3572                         __func__, width);
3573                 /* fall through */
3574         case 4: return OPA_LINK_WIDTH_4X;
3575         }
3576 }
3577
3578 /*
3579  * Do a population count on the bottom nibble.
3580  */
3581 static const u8 bit_counts[16] = {
3582         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3583 };
3584 static inline u8 nibble_to_count(u8 nibble)
3585 {
3586         return bit_counts[nibble & 0xf];
3587 }
3588
3589 /*
3590  * Read the active lane information from the 8051 registers and return
3591  * their widths.
3592  *
3593  * Active lane information is found in these 8051 registers:
3594  *      enable_lane_tx
3595  *      enable_lane_rx
3596  */
3597 static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3598                             u16 *rx_width)
3599 {
3600         u16 tx, rx;
3601         u8 enable_lane_rx;
3602         u8 enable_lane_tx;
3603         u8 tx_polarity_inversion;
3604         u8 rx_polarity_inversion;
3605         u8 max_rate;
3606
3607         /* read the active lanes */
3608         read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3609                                 &rx_polarity_inversion, &max_rate);
3610         read_local_lni(dd, &enable_lane_rx);
3611
3612         /* convert to counts */
3613         tx = nibble_to_count(enable_lane_tx);
3614         rx = nibble_to_count(enable_lane_rx);
3615
3616         /*
3617          * Set link_speed_active here, overriding what was set in
3618          * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3619          * set the max_rate field in handle_verify_cap until v0.19.
3620          */
3621         if ((dd->icode == ICODE_RTL_SILICON)
3622                                 && (dd->dc8051_ver < dc8051_ver(0, 19))) {
3623                 /* max_rate: 0 = 12.5G, 1 = 25G */
3624                 switch (max_rate) {
3625                 case 0:
3626                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3627                         break;
3628                 default:
3629                         dd_dev_err(dd,
3630                                 "%s: unexpected max rate %d, using 25Gb\n",
3631                                 __func__, (int)max_rate);
3632                         /* fall through */
3633                 case 1:
3634                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3635                         break;
3636                 }
3637         }
3638
3639         dd_dev_info(dd,
3640                 "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3641                 enable_lane_tx, tx, enable_lane_rx, rx);
3642         *tx_width = link_width_to_bits(dd, tx);
3643         *rx_width = link_width_to_bits(dd, rx);
3644 }
3645
3646 /*
3647  * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3648  * Valid after the end of VerifyCap and during LinkUp.  Does not change
3649  * after link up.  I.e. look elsewhere for downgrade information.
3650  *
3651  * Bits are:
3652  *      + bits [7:4] contain the number of active transmitters
3653  *      + bits [3:0] contain the number of active receivers
3654  * These are numbers 1 through 4 and can be different values if the
3655  * link is asymmetric.
3656  *
3657  * verify_cap_local_fm_link_width[0] retains its original value.
3658  */
3659 static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3660                               u16 *rx_width)
3661 {
3662         u16 widths, tx, rx;
3663         u8 misc_bits, local_flags;
3664         u16 active_tx, active_rx;
3665
3666         read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3667         tx = widths >> 12;
3668         rx = (widths >> 8) & 0xf;
3669
3670         *tx_width = link_width_to_bits(dd, tx);
3671         *rx_width = link_width_to_bits(dd, rx);
3672
3673         /* print the active widths */
3674         get_link_widths(dd, &active_tx, &active_rx);
3675 }
3676
3677 /*
3678  * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3679  * hardware information when the link first comes up.
3680  *
3681  * The link width is not available until after VerifyCap.AllFramesReceived
3682  * (the trigger for handle_verify_cap), so this is outside that routine
3683  * and should be called when the 8051 signals linkup.
3684  */
3685 void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3686 {
3687         u16 tx_width, rx_width;
3688
3689         /* get end-of-LNI link widths */
3690         get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3691
3692         /* use tx_width as the link is supposed to be symmetric on link up */
3693         ppd->link_width_active = tx_width;
3694         /* link width downgrade active (LWD.A) starts out matching LW.A */
3695         ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3696         ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3697         /* per OPA spec, on link up LWD.E resets to LWD.S */
3698         ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3699         /* cache the active egress rate (units {10^6 bits/sec]) */
3700         ppd->current_egress_rate = active_egress_rate(ppd);
3701 }
3702
3703 /*
3704  * Handle a verify capabilities interrupt from the 8051.
3705  *
3706  * This is a work-queue function outside of the interrupt.
3707  */
3708 void handle_verify_cap(struct work_struct *work)
3709 {
3710         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3711                                                                 link_vc_work);
3712         struct hfi1_devdata *dd = ppd->dd;
3713         u64 reg;
3714         u8 power_management;
3715         u8 continious;
3716         u8 vcu;
3717         u8 vau;
3718         u8 z;
3719         u16 vl15buf;
3720         u16 link_widths;
3721         u16 crc_mask;
3722         u16 crc_val;
3723         u16 device_id;
3724         u16 active_tx, active_rx;
3725         u8 partner_supported_crc;
3726         u8 remote_tx_rate;
3727         u8 device_rev;
3728
3729         set_link_state(ppd, HLS_VERIFY_CAP);
3730
3731         lcb_shutdown(dd, 0);
3732         adjust_lcb_for_fpga_serdes(dd);
3733
3734         /*
3735          * These are now valid:
3736          *      remote VerifyCap fields in the general LNI config
3737          *      CSR DC8051_STS_REMOTE_GUID
3738          *      CSR DC8051_STS_REMOTE_NODE_TYPE
3739          *      CSR DC8051_STS_REMOTE_FM_SECURITY
3740          *      CSR DC8051_STS_REMOTE_PORT_NO
3741          */
3742
3743         read_vc_remote_phy(dd, &power_management, &continious);
3744         read_vc_remote_fabric(
3745                 dd,
3746                 &vau,
3747                 &z,
3748                 &vcu,
3749                 &vl15buf,
3750                 &partner_supported_crc);
3751         read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3752         read_remote_device_id(dd, &device_id, &device_rev);
3753         /*
3754          * And the 'MgmtAllowed' information, which is exchanged during
3755          * LNI, is also be available at this point.
3756          */
3757         read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3758         /* print the active widths */
3759         get_link_widths(dd, &active_tx, &active_rx);
3760         dd_dev_info(dd,
3761                 "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3762                 (int)power_management, (int)continious);
3763         dd_dev_info(dd,
3764                 "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3765                 (int)vau,
3766                 (int)z,
3767                 (int)vcu,
3768                 (int)vl15buf,
3769                 (int)partner_supported_crc);
3770         dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3771                 (u32)remote_tx_rate, (u32)link_widths);
3772         dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3773                 (u32)device_id, (u32)device_rev);
3774         /*
3775          * The peer vAU value just read is the peer receiver value.  HFI does
3776          * not support a transmit vAU of 0 (AU == 8).  We advertised that
3777          * with Z=1 in the fabric capabilities sent to the peer.  The peer
3778          * will see our Z=1, and, if it advertised a vAU of 0, will move its
3779          * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3780          * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3781          * subject to the Z value exception.
3782          */
3783         if (vau == 0)
3784                 vau = 1;
3785         set_up_vl15(dd, vau, vl15buf);
3786
3787         /* set up the LCB CRC mode */
3788         crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3789
3790         /* order is important: use the lowest bit in common */
3791         if (crc_mask & CAP_CRC_14B)
3792                 crc_val = LCB_CRC_14B;
3793         else if (crc_mask & CAP_CRC_48B)
3794                 crc_val = LCB_CRC_48B;
3795         else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3796                 crc_val = LCB_CRC_12B_16B_PER_LANE;
3797         else
3798                 crc_val = LCB_CRC_16B;
3799
3800         dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3801         write_csr(dd, DC_LCB_CFG_CRC_MODE,
3802                   (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3803
3804         /* set (14b only) or clear sideband credit */
3805         reg = read_csr(dd, SEND_CM_CTRL);
3806         if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3807                 write_csr(dd, SEND_CM_CTRL,
3808                         reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3809         } else {
3810                 write_csr(dd, SEND_CM_CTRL,
3811                         reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3812         }
3813
3814         ppd->link_speed_active = 0;     /* invalid value */
3815         if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3816                 /* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3817                 switch (remote_tx_rate) {
3818                 case 0:
3819                         ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3820                         break;
3821                 case 1:
3822                         ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823                         break;
3824                 }
3825         } else {
3826                 /* actual rate is highest bit of the ANDed rates */
3827                 u8 rate = remote_tx_rate & ppd->local_tx_rate;
3828
3829                 if (rate & 2)
3830                         ppd->link_speed_active = OPA_LINK_SPEED_25G;
3831                 else if (rate & 1)
3832                         ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3833         }
3834         if (ppd->link_speed_active == 0) {
3835                 dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3836                         __func__, (int)remote_tx_rate);
3837                 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3838         }
3839
3840         /*
3841          * Cache the values of the supported, enabled, and active
3842          * LTP CRC modes to return in 'portinfo' queries. But the bit
3843          * flags that are returned in the portinfo query differ from
3844          * what's in the link_crc_mask, crc_sizes, and crc_val
3845          * variables. Convert these here.
3846          */
3847         ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3848                 /* supported crc modes */
3849         ppd->port_ltp_crc_mode |=
3850                 cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3851                 /* enabled crc modes */
3852         ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3853                 /* active crc mode */
3854
3855         /* set up the remote credit return table */
3856         assign_remote_cm_au_table(dd, vcu);
3857
3858         /*
3859          * The LCB is reset on entry to handle_verify_cap(), so this must
3860          * be applied on every link up.
3861          *
3862          * Adjust LCB error kill enable to kill the link if
3863          * these RBUF errors are seen:
3864          *      REPLAY_BUF_MBE_SMASK
3865          *      FLIT_INPUT_BUF_MBE_SMASK
3866          */
3867         if (is_a0(dd)) {                        /* fixed in B0 */
3868                 reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3869                 reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3870                         | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3871                 write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3872         }
3873
3874         /* pull LCB fifos out of reset - all fifo clocks must be stable */
3875         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3876
3877         /* give 8051 access to the LCB CSRs */
3878         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3879         set_8051_lcb_access(dd);
3880
3881         ppd->neighbor_guid =
3882                 read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3883         ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3884                                         DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3885         ppd->neighbor_type =
3886                 read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3887                 DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3888         ppd->neighbor_fm_security =
3889                 read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3890                 DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3891         dd_dev_info(dd,
3892                 "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3893                 ppd->neighbor_guid, ppd->neighbor_type,
3894                 ppd->mgmt_allowed, ppd->neighbor_fm_security);
3895         if (ppd->mgmt_allowed)
3896                 add_full_mgmt_pkey(ppd);
3897
3898         /* tell the 8051 to go to LinkUp */
3899         set_link_state(ppd, HLS_GOING_UP);
3900 }
3901
3902 /*
3903  * Apply the link width downgrade enabled policy against the current active
3904  * link widths.
3905  *
3906  * Called when the enabled policy changes or the active link widths change.
3907  */
3908 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3909 {
3910         int skip = 1;
3911         int do_bounce = 0;
3912         u16 lwde = ppd->link_width_downgrade_enabled;
3913         u16 tx, rx;
3914
3915         mutex_lock(&ppd->hls_lock);
3916         /* only apply if the link is up */
3917         if (ppd->host_link_state & HLS_UP)
3918                 skip = 0;
3919         mutex_unlock(&ppd->hls_lock);
3920         if (skip)
3921                 return;
3922
3923         if (refresh_widths) {
3924                 get_link_widths(ppd->dd, &tx, &rx);
3925                 ppd->link_width_downgrade_tx_active = tx;
3926                 ppd->link_width_downgrade_rx_active = rx;
3927         }
3928
3929         if (lwde == 0) {
3930                 /* downgrade is disabled */
3931
3932                 /* bounce if not at starting active width */
3933                 if ((ppd->link_width_active !=
3934                                         ppd->link_width_downgrade_tx_active)
3935                                 || (ppd->link_width_active !=
3936                                         ppd->link_width_downgrade_rx_active)) {
3937                         dd_dev_err(ppd->dd,
3938                                 "Link downgrade is disabled and link has downgraded, downing link\n");
3939                         dd_dev_err(ppd->dd,
3940                                 "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3941                                 ppd->link_width_active,
3942                                 ppd->link_width_downgrade_tx_active,
3943                                 ppd->link_width_downgrade_rx_active);
3944                         do_bounce = 1;
3945                 }
3946         } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3947                 || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3948                 /* Tx or Rx is outside the enabled policy */
3949                 dd_dev_err(ppd->dd,
3950                         "Link is outside of downgrade allowed, downing link\n");
3951                 dd_dev_err(ppd->dd,
3952                         "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3953                         lwde,
3954                         ppd->link_width_downgrade_tx_active,
3955                         ppd->link_width_downgrade_rx_active);
3956                 do_bounce = 1;
3957         }
3958
3959         if (do_bounce) {
3960                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3961                   OPA_LINKDOWN_REASON_WIDTH_POLICY);
3962                 set_link_state(ppd, HLS_DN_OFFLINE);
3963                 start_link(ppd);
3964         }
3965 }
3966
3967 /*
3968  * Handle a link downgrade interrupt from the 8051.
3969  *
3970  * This is a work-queue function outside of the interrupt.
3971  */
3972 void handle_link_downgrade(struct work_struct *work)
3973 {
3974         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3975                                                         link_downgrade_work);
3976
3977         dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3978         apply_link_downgrade_policy(ppd, 1);
3979 }
3980
3981 static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3982 {
3983         return flag_string(buf, buf_len, flags, dcc_err_flags,
3984                 ARRAY_SIZE(dcc_err_flags));
3985 }
3986
3987 static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3988 {
3989         return flag_string(buf, buf_len, flags, lcb_err_flags,
3990                 ARRAY_SIZE(lcb_err_flags));
3991 }
3992
3993 static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3994 {
3995         return flag_string(buf, buf_len, flags, dc8051_err_flags,
3996                 ARRAY_SIZE(dc8051_err_flags));
3997 }
3998
3999 static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4000 {
4001         return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4002                 ARRAY_SIZE(dc8051_info_err_flags));
4003 }
4004
4005 static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4006 {
4007         return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4008                 ARRAY_SIZE(dc8051_info_host_msg_flags));
4009 }
4010
4011 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4012 {
4013         struct hfi1_pportdata *ppd = dd->pport;
4014         u64 info, err, host_msg;
4015         int queue_link_down = 0;
4016         char buf[96];
4017
4018         /* look at the flags */
4019         if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4020                 /* 8051 information set by firmware */
4021                 /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4022                 info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4023                 err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4024                         & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4025                 host_msg = (info >>
4026                         DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4027                         & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4028
4029                 /*
4030                  * Handle error flags.
4031                  */
4032                 if (err & FAILED_LNI) {
4033                         /*
4034                          * LNI error indications are cleared by the 8051
4035                          * only when starting polling.  Only pay attention
4036                          * to them when in the states that occur during
4037                          * LNI.
4038                          */
4039                         if (ppd->host_link_state
4040                             & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4041                                 queue_link_down = 1;
4042                                 dd_dev_info(dd, "Link error: %s\n",
4043                                         dc8051_info_err_string(buf,
4044                                                 sizeof(buf),
4045                                                 err & FAILED_LNI));
4046                         }
4047                         err &= ~(u64)FAILED_LNI;
4048                 }
4049                 if (err) {
4050                         /* report remaining errors, but do not do anything */
4051                         dd_dev_err(dd, "8051 info error: %s\n",
4052                                 dc8051_info_err_string(buf, sizeof(buf), err));
4053                 }
4054
4055                 /*
4056                  * Handle host message flags.
4057                  */
4058                 if (host_msg & HOST_REQ_DONE) {
4059                         /*
4060                          * Presently, the driver does a busy wait for
4061                          * host requests to complete.  This is only an
4062                          * informational message.
4063                          * NOTE: The 8051 clears the host message
4064                          * information *on the next 8051 command*.
4065                          * Therefore, when linkup is achieved,
4066                          * this flag will still be set.
4067                          */
4068                         host_msg &= ~(u64)HOST_REQ_DONE;
4069                 }
4070                 if (host_msg & BC_SMA_MSG) {
4071                         queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4072                         host_msg &= ~(u64)BC_SMA_MSG;
4073                 }
4074                 if (host_msg & LINKUP_ACHIEVED) {
4075                         dd_dev_info(dd, "8051: Link up\n");
4076                         queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4077                         host_msg &= ~(u64)LINKUP_ACHIEVED;
4078                 }
4079                 if (host_msg & EXT_DEVICE_CFG_REQ) {
4080                         handle_8051_request(dd);
4081                         host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4082                 }
4083                 if (host_msg & VERIFY_CAP_FRAME) {
4084                         queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4085                         host_msg &= ~(u64)VERIFY_CAP_FRAME;
4086                 }
4087                 if (host_msg & LINK_GOING_DOWN) {
4088                         const char *extra = "";
4089                         /* no downgrade action needed if going down */
4090                         if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091                                 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4092                                 extra = " (ignoring downgrade)";
4093                         }
4094                         dd_dev_info(dd, "8051: Link down%s\n", extra);
4095                         queue_link_down = 1;
4096                         host_msg &= ~(u64)LINK_GOING_DOWN;
4097                 }
4098                 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099                         queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4100                         host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4101                 }
4102                 if (host_msg) {
4103                         /* report remaining messages, but do not do anything */
4104                         dd_dev_info(dd, "8051 info host message: %s\n",
4105                                 dc8051_info_host_msg_string(buf, sizeof(buf),
4106                                         host_msg));
4107                 }
4108
4109                 reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4110         }
4111         if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4112                 /*
4113                  * Lost the 8051 heartbeat.  If this happens, we
4114                  * receive constant interrupts about it.  Disable
4115                  * the interrupt after the first.
4116                  */
4117                 dd_dev_err(dd, "Lost 8051 heartbeat\n");
4118                 write_csr(dd, DC_DC8051_ERR_EN,
4119                         read_csr(dd, DC_DC8051_ERR_EN)
4120                           & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4121
4122                 reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4123         }
4124         if (reg) {
4125                 /* report the error, but do not do anything */
4126                 dd_dev_err(dd, "8051 error: %s\n",
4127                         dc8051_err_string(buf, sizeof(buf), reg));
4128         }
4129
4130         if (queue_link_down) {
4131                 /* if the link is already going down or disabled, do not
4132                  * queue another */
4133                 if ((ppd->host_link_state
4134                                     & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4135                                 || ppd->link_enabled == 0) {
4136                         dd_dev_info(dd, "%s: not queuing link down\n",
4137                                 __func__);
4138                 } else {
4139                         queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4140                 }
4141         }
4142 }
4143
4144 static const char * const fm_config_txt[] = {
4145 [0] =
4146         "BadHeadDist: Distance violation between two head flits",
4147 [1] =
4148         "BadTailDist: Distance violation between two tail flits",
4149 [2] =
4150         "BadCtrlDist: Distance violation between two credit control flits",
4151 [3] =
4152         "BadCrdAck: Credits return for unsupported VL",
4153 [4] =
4154         "UnsupportedVLMarker: Received VL Marker",
4155 [5] =
4156         "BadPreempt: Exceeded the preemption nesting level",
4157 [6] =
4158         "BadControlFlit: Received unsupported control flit",
4159 /* no 7 */
4160 [8] =
4161         "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4162 };
4163
4164 static const char * const port_rcv_txt[] = {
4165 [1] =
4166         "BadPktLen: Illegal PktLen",
4167 [2] =
4168         "PktLenTooLong: Packet longer than PktLen",
4169 [3] =
4170         "PktLenTooShort: Packet shorter than PktLen",
4171 [4] =
4172         "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4173 [5] =
4174         "BadDLID: Illegal DLID (0, doesn't match HFI)",
4175 [6] =
4176         "BadL2: Illegal L2 opcode",
4177 [7] =
4178         "BadSC: Unsupported SC",
4179 [9] =
4180         "BadRC: Illegal RC",
4181 [11] =
4182         "PreemptError: Preempting with same VL",
4183 [12] =
4184         "PreemptVL15: Preempting a VL15 packet",
4185 };
4186
4187 #define OPA_LDR_FMCONFIG_OFFSET 16
4188 #define OPA_LDR_PORTRCV_OFFSET 0
4189 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4190 {
4191         u64 info, hdr0, hdr1;
4192         const char *extra;
4193         char buf[96];
4194         struct hfi1_pportdata *ppd = dd->pport;
4195         u8 lcl_reason = 0;
4196         int do_bounce = 0;
4197
4198         if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4199                 if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4200                         info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4201                         dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4202                         /* set status bit */
4203                         dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4204                 }
4205                 reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4206         }
4207
4208         if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4209                 struct hfi1_pportdata *ppd = dd->pport;
4210                 /* this counter saturates at (2^32) - 1 */
4211                 if (ppd->link_downed < (u32)UINT_MAX)
4212                         ppd->link_downed++;
4213                 reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4214         }
4215
4216         if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4217                 u8 reason_valid = 1;
4218
4219                 info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4220                 if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4221                         dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4222                         /* set status bit */
4223                         dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4224                 }
4225                 switch (info) {
4226                 case 0:
4227                 case 1:
4228                 case 2:
4229                 case 3:
4230                 case 4:
4231                 case 5:
4232                 case 6:
4233                         extra = fm_config_txt[info];
4234                         break;
4235                 case 8:
4236                         extra = fm_config_txt[info];
4237                         if (ppd->port_error_action &
4238                             OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4239                                 do_bounce = 1;
4240                                 /*
4241                                  * lcl_reason cannot be derived from info
4242                                  * for this error
4243                                  */
4244                                 lcl_reason =
4245                                   OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4246                         }
4247                         break;
4248                 default:
4249                         reason_valid = 0;
4250                         snprintf(buf, sizeof(buf), "reserved%lld", info);
4251                         extra = buf;
4252                         break;
4253                 }
4254
4255                 if (reason_valid && !do_bounce) {
4256                         do_bounce = ppd->port_error_action &
4257                                         (1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4258                         lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4259                 }
4260
4261                 /* just report this */
4262                 dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4263                 reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4264         }
4265
4266         if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4267                 u8 reason_valid = 1;
4268
4269                 info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4270                 hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4271                 hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4272                 if (!(dd->err_info_rcvport.status_and_code &
4273                       OPA_EI_STATUS_SMASK)) {
4274                         dd->err_info_rcvport.status_and_code =
4275                                 info & OPA_EI_CODE_SMASK;
4276                         /* set status bit */
4277                         dd->err_info_rcvport.status_and_code |=
4278                                 OPA_EI_STATUS_SMASK;
4279                         /* save first 2 flits in the packet that caused
4280                          * the error */
4281                          dd->err_info_rcvport.packet_flit1 = hdr0;
4282                          dd->err_info_rcvport.packet_flit2 = hdr1;
4283                 }
4284                 switch (info) {
4285                 case 1:
4286                 case 2:
4287                 case 3:
4288                 case 4:
4289                 case 5:
4290                 case 6:
4291                 case 7:
4292                 case 9:
4293                 case 11:
4294                 case 12:
4295                         extra = port_rcv_txt[info];
4296                         break;
4297                 default:
4298                         reason_valid = 0;
4299                         snprintf(buf, sizeof(buf), "reserved%lld", info);
4300                         extra = buf;
4301                         break;
4302                 }
4303
4304                 if (reason_valid && !do_bounce) {
4305                         do_bounce = ppd->port_error_action &
4306                                         (1 << (OPA_LDR_PORTRCV_OFFSET + info));
4307                         lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4308                 }
4309
4310                 /* just report this */
4311                 dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4312                 dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4313                         hdr0, hdr1);
4314
4315                 reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4316         }
4317
4318         if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4319                 /* informative only */
4320                 dd_dev_info(dd, "8051 access to LCB blocked\n");
4321                 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4322         }
4323         if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4324                 /* informative only */
4325                 dd_dev_info(dd, "host access to LCB blocked\n");
4326                 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4327         }
4328
4329         /* report any remaining errors */
4330         if (reg)
4331                 dd_dev_info(dd, "DCC Error: %s\n",
4332                         dcc_err_string(buf, sizeof(buf), reg));
4333
4334         if (lcl_reason == 0)
4335                 lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4336
4337         if (do_bounce) {
4338                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4339                 set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4340                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4341         }
4342 }
4343
4344 static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4345 {
4346         char buf[96];
4347
4348         dd_dev_info(dd, "LCB Error: %s\n",
4349                 lcb_err_string(buf, sizeof(buf), reg));
4350 }
4351
4352 /*
4353  * CCE block DC interrupt.  Source is < 8.
4354  */
4355 static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4356 {
4357         const struct err_reg_info *eri = &dc_errs[source];
4358
4359         if (eri->handler) {
4360                 interrupt_clear_down(dd, 0, eri);
4361         } else if (source == 3 /* dc_lbm_int */) {
4362                 /*
4363                  * This indicates that a parity error has occurred on the
4364                  * address/control lines presented to the LBM.  The error
4365                  * is a single pulse, there is no associated error flag,
4366                  * and it is non-maskable.  This is because if a parity
4367                  * error occurs on the request the request is dropped.
4368                  * This should never occur, but it is nice to know if it
4369                  * ever does.
4370                  */
4371                 dd_dev_err(dd, "Parity error in DC LBM block\n");
4372         } else {
4373                 dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4374         }
4375 }
4376
4377 /*
4378  * TX block send credit interrupt.  Source is < 160.
4379  */
4380 static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4381 {
4382         sc_group_release_update(dd, source);
4383 }
4384
4385 /*
4386  * TX block SDMA interrupt.  Source is < 48.
4387  *
4388  * SDMA interrupts are grouped by type:
4389  *
4390  *       0 -  N-1 = SDma
4391  *       N - 2N-1 = SDmaProgress
4392  *      2N - 3N-1 = SDmaIdle
4393  */
4394 static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4395 {
4396         /* what interrupt */
4397         unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4398         /* which engine */
4399         unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4400
4401 #ifdef CONFIG_SDMA_VERBOSITY
4402         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4403                    slashstrip(__FILE__), __LINE__, __func__);
4404         sdma_dumpstate(&dd->per_sdma[which]);
4405 #endif
4406
4407         if (likely(what < 3 && which < dd->num_sdma)) {
4408                 sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4409         } else {
4410                 /* should not happen */
4411                 dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4412         }
4413 }
4414
4415 /*
4416  * RX block receive available interrupt.  Source is < 160.
4417  */
4418 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4419 {
4420         struct hfi1_ctxtdata *rcd;
4421         char *err_detail;
4422
4423         if (likely(source < dd->num_rcv_contexts)) {
4424                 rcd = dd->rcd[source];
4425                 if (rcd) {
4426                         if (source < dd->first_user_ctxt)
4427                                 rcd->do_interrupt(rcd, 0);
4428                         else
4429                                 handle_user_interrupt(rcd);
4430                         return; /* OK */
4431                 }
4432                 /* received an interrupt, but no rcd */
4433                 err_detail = "dataless";
4434         } else {
4435                 /* received an interrupt, but are not using that context */
4436                 err_detail = "out of range";
4437         }
4438         dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4439                 err_detail, source);
4440 }
4441
4442 /*
4443  * RX block receive urgent interrupt.  Source is < 160.
4444  */
4445 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4446 {
4447         struct hfi1_ctxtdata *rcd;
4448         char *err_detail;
4449
4450         if (likely(source < dd->num_rcv_contexts)) {
4451                 rcd = dd->rcd[source];
4452                 if (rcd) {
4453                         /* only pay attention to user urgent interrupts */
4454                         if (source >= dd->first_user_ctxt)
4455                                 handle_user_interrupt(rcd);
4456                         return; /* OK */
4457                 }
4458                 /* received an interrupt, but no rcd */
4459                 err_detail = "dataless";
4460         } else {
4461                 /* received an interrupt, but are not using that context */
4462                 err_detail = "out of range";
4463         }
4464         dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4465                 err_detail, source);
4466 }
4467
4468 /*
4469  * Reserved range interrupt.  Should not be called in normal operation.
4470  */
4471 static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4472 {
4473         char name[64];
4474
4475         dd_dev_err(dd, "unexpected %s interrupt\n",
4476                                 is_reserved_name(name, sizeof(name), source));
4477 }
4478
4479 static const struct is_table is_table[] = {
4480 /* start                     end
4481                                 name func               interrupt func */
4482 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4483                                 is_misc_err_name,       is_misc_err_int },
4484 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4485                                 is_sdma_eng_err_name,   is_sdma_eng_err_int },
4486 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4487                                 is_sendctxt_err_name,   is_sendctxt_err_int },
4488 { IS_SDMA_START,             IS_SDMA_END,
4489                                 is_sdma_eng_name,       is_sdma_eng_int },
4490 { IS_VARIOUS_START,          IS_VARIOUS_END,
4491                                 is_various_name,        is_various_int },
4492 { IS_DC_START,       IS_DC_END,
4493                                 is_dc_name,             is_dc_int },
4494 { IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4495                                 is_rcv_avail_name,      is_rcv_avail_int },
4496 { IS_RCVURGENT_START,    IS_RCVURGENT_END,
4497                                 is_rcv_urgent_name,     is_rcv_urgent_int },
4498 { IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4499                                 is_send_credit_name,    is_send_credit_int},
4500 { IS_RESERVED_START,     IS_RESERVED_END,
4501                                 is_reserved_name,       is_reserved_int},
4502 };
4503
4504 /*
4505  * Interrupt source interrupt - called when the given source has an interrupt.
4506  * Source is a bit index into an array of 64-bit integers.
4507  */
4508 static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4509 {
4510         const struct is_table *entry;
4511
4512         /* avoids a double compare by walking the table in-order */
4513         for (entry = &is_table[0]; entry->is_name; entry++) {
4514                 if (source < entry->end) {
4515                         trace_hfi1_interrupt(dd, entry, source);
4516                         entry->is_int(dd, source - entry->start);
4517                         return;
4518                 }
4519         }
4520         /* fell off the end */
4521         dd_dev_err(dd, "invalid interrupt source %u\n", source);
4522 }
4523
4524 /*
4525  * General interrupt handler.  This is able to correctly handle
4526  * all interrupts in case INTx is used.
4527  */
4528 static irqreturn_t general_interrupt(int irq, void *data)
4529 {
4530         struct hfi1_devdata *dd = data;
4531         u64 regs[CCE_NUM_INT_CSRS];
4532         u32 bit;
4533         int i;
4534
4535         this_cpu_inc(*dd->int_counter);
4536
4537         /* phase 1: scan and clear all handled interrupts */
4538         for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4539                 if (dd->gi_mask[i] == 0) {
4540                         regs[i] = 0;    /* used later */
4541                         continue;
4542                 }
4543                 regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4544                                 dd->gi_mask[i];
4545                 /* only clear if anything is set */
4546                 if (regs[i])
4547                         write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4548         }
4549
4550         /* phase 2: call the appropriate handler */
4551         for_each_set_bit(bit, (unsigned long *)&regs[0],
4552                                                 CCE_NUM_INT_CSRS*64) {
4553                 is_interrupt(dd, bit);
4554         }
4555
4556         return IRQ_HANDLED;
4557 }
4558
4559 static irqreturn_t sdma_interrupt(int irq, void *data)
4560 {
4561         struct sdma_engine *sde = data;
4562         struct hfi1_devdata *dd = sde->dd;
4563         u64 status;
4564
4565 #ifdef CONFIG_SDMA_VERBOSITY
4566         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4567                    slashstrip(__FILE__), __LINE__, __func__);
4568         sdma_dumpstate(sde);
4569 #endif
4570
4571         this_cpu_inc(*dd->int_counter);
4572
4573         /* This read_csr is really bad in the hot path */
4574         status = read_csr(dd,
4575                         CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4576                         & sde->imask;
4577         if (likely(status)) {
4578                 /* clear the interrupt(s) */
4579                 write_csr(dd,
4580                         CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4581                         status);
4582
4583                 /* handle the interrupt(s) */
4584                 sdma_engine_interrupt(sde, status);
4585         } else
4586                 dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4587                         sde->this_idx);
4588
4589         return IRQ_HANDLED;
4590 }
4591
4592 /*
4593  * Clear the receive interrupt, forcing the write and making sure
4594  * we have data from the chip, pushing everything in front of it
4595  * back to the host.
4596  */
4597 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4598 {
4599         struct hfi1_devdata *dd = rcd->dd;
4600         u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4601
4602         mmiowb();       /* make sure everything before is written */
4603         write_csr(dd, addr, rcd->imask);
4604         /* force the above write on the chip and get a value back */
4605         (void)read_csr(dd, addr);
4606 }
4607
4608 /* force the receive interrupt */
4609 static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4610 {
4611         write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4612 }
4613
4614 /* return non-zero if a packet is present */
4615 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4616 {
4617         if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4618                 return (rcd->seq_cnt ==
4619                                 rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4620
4621         /* else is RDMA rtail */
4622         return (rcd->head != get_rcvhdrtail(rcd));
4623 }
4624
4625 /*
4626  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
4627  * This routine will try to handle packets immediately (latency), but if
4628  * it finds too many, it will invoke the thread handler (bandwitdh).  The
4629  * chip receive interupt is *not* cleared down until this or the thread (if
4630  * invoked) is finished.  The intent is to avoid extra interrupts while we
4631  * are processing packets anyway.
4632  */
4633 static irqreturn_t receive_context_interrupt(int irq, void *data)
4634 {
4635         struct hfi1_ctxtdata *rcd = data;
4636         struct hfi1_devdata *dd = rcd->dd;
4637         int disposition;
4638         int present;
4639
4640         trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4641         this_cpu_inc(*dd->int_counter);
4642
4643         /* receive interrupt remains blocked while processing packets */
4644         disposition = rcd->do_interrupt(rcd, 0);
4645
4646         /*
4647          * Too many packets were seen while processing packets in this
4648          * IRQ handler.  Invoke the handler thread.  The receive interrupt
4649          * remains blocked.
4650          */
4651         if (disposition == RCV_PKT_LIMIT)
4652                 return IRQ_WAKE_THREAD;
4653
4654         /*
4655          * The packet processor detected no more packets.  Clear the receive
4656          * interrupt and recheck for a packet packet that may have arrived
4657          * after the previous check and interrupt clear.  If a packet arrived,
4658          * force another interrupt.
4659          */
4660         clear_recv_intr(rcd);
4661         present = check_packet_present(rcd);
4662         if (present)
4663                 force_recv_intr(rcd);
4664
4665         return IRQ_HANDLED;
4666 }
4667
4668 /*
4669  * Receive packet thread handler.  This expects to be invoked with the
4670  * receive interrupt still blocked.
4671  */
4672 static irqreturn_t receive_context_thread(int irq, void *data)
4673 {
4674         struct hfi1_ctxtdata *rcd = data;
4675         int present;
4676
4677         /* receive interrupt is still blocked from the IRQ handler */
4678         (void)rcd->do_interrupt(rcd, 1);
4679
4680         /*
4681          * The packet processor will only return if it detected no more
4682          * packets.  Hold IRQs here so we can safely clear the interrupt and
4683          * recheck for a packet that may have arrived after the previous
4684          * check and the interrupt clear.  If a packet arrived, force another
4685          * interrupt.
4686          */
4687         local_irq_disable();
4688         clear_recv_intr(rcd);
4689         present = check_packet_present(rcd);
4690         if (present)
4691                 force_recv_intr(rcd);
4692         local_irq_enable();
4693
4694         return IRQ_HANDLED;
4695 }
4696
4697 /* ========================================================================= */
4698
4699 u32 read_physical_state(struct hfi1_devdata *dd)
4700 {
4701         u64 reg;
4702
4703         reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4704         return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4705                                 & DC_DC8051_STS_CUR_STATE_PORT_MASK;
4706 }
4707
4708 static u32 read_logical_state(struct hfi1_devdata *dd)
4709 {
4710         u64 reg;
4711
4712         reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4713         return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4714                                 & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4715 }
4716
4717 static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4718 {
4719         u64 reg;
4720
4721         reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4722         /* clear current state, set new state */
4723         reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4724         reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4725         write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4726 }
4727
4728 /*
4729  * Use the 8051 to read a LCB CSR.
4730  */
4731 static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4732 {
4733         u32 regno;
4734         int ret;
4735
4736         if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4737                 if (acquire_lcb_access(dd, 0) == 0) {
4738                         *data = read_csr(dd, addr);
4739                         release_lcb_access(dd, 0);
4740                         return 0;
4741                 }
4742                 return -EBUSY;
4743         }
4744
4745         /* register is an index of LCB registers: (offset - base) / 8 */
4746         regno = (addr - DC_LCB_CFG_RUN) >> 3;
4747         ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4748         if (ret != HCMD_SUCCESS)
4749                 return -EBUSY;
4750         return 0;
4751 }
4752
4753 /*
4754  * Read an LCB CSR.  Access may not be in host control, so check.
4755  * Return 0 on success, -EBUSY on failure.
4756  */
4757 int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4758 {
4759         struct hfi1_pportdata *ppd = dd->pport;
4760
4761         /* if up, go through the 8051 for the value */
4762         if (ppd->host_link_state & HLS_UP)
4763                 return read_lcb_via_8051(dd, addr, data);
4764         /* if going up or down, no access */
4765         if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4766                 return -EBUSY;
4767         /* otherwise, host has access */
4768         *data = read_csr(dd, addr);
4769         return 0;
4770 }
4771
4772 /*
4773  * Use the 8051 to write a LCB CSR.
4774  */
4775 static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4776 {
4777
4778         if (acquire_lcb_access(dd, 0) == 0) {
4779                 write_csr(dd, addr, data);
4780                 release_lcb_access(dd, 0);
4781                 return 0;
4782         }
4783         return -EBUSY;
4784 }
4785
4786 /*
4787  * Write an LCB CSR.  Access may not be in host control, so check.
4788  * Return 0 on success, -EBUSY on failure.
4789  */
4790 int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4791 {
4792         struct hfi1_pportdata *ppd = dd->pport;
4793
4794         /* if up, go through the 8051 for the value */
4795         if (ppd->host_link_state & HLS_UP)
4796                 return write_lcb_via_8051(dd, addr, data);
4797         /* if going up or down, no access */
4798         if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4799                 return -EBUSY;
4800         /* otherwise, host has access */
4801         write_csr(dd, addr, data);
4802         return 0;
4803 }
4804
4805 /*
4806  * Returns:
4807  *      < 0 = Linux error, not able to get access
4808  *      > 0 = 8051 command RETURN_CODE
4809  */
4810 static int do_8051_command(
4811         struct hfi1_devdata *dd,
4812         u32 type,
4813         u64 in_data,
4814         u64 *out_data)
4815 {
4816         u64 reg, completed;
4817         int return_code;
4818         unsigned long flags;
4819         unsigned long timeout;
4820
4821         hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4822
4823         /*
4824          * Alternative to holding the lock for a long time:
4825          * - keep busy wait - have other users bounce off
4826          */
4827         spin_lock_irqsave(&dd->dc8051_lock, flags);
4828
4829         /* We can't send any commands to the 8051 if it's in reset */
4830         if (dd->dc_shutdown) {
4831                 return_code = -ENODEV;
4832                 goto fail;
4833         }
4834
4835         /*
4836          * If an 8051 host command timed out previously, then the 8051 is
4837          * stuck.
4838          *
4839          * On first timeout, attempt to reset and restart the entire DC
4840          * block (including 8051). (Is this too big of a hammer?)
4841          *
4842          * If the 8051 times out a second time, the reset did not bring it
4843          * back to healthy life. In that case, fail any subsequent commands.
4844          */
4845         if (dd->dc8051_timed_out) {
4846                 if (dd->dc8051_timed_out > 1) {
4847                         dd_dev_err(dd,
4848                                    "Previous 8051 host command timed out, skipping command %u\n",
4849                                    type);
4850                         return_code = -ENXIO;
4851                         goto fail;
4852                 }
4853                 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4854                 dc_shutdown(dd);
4855                 dc_start(dd);
4856                 spin_lock_irqsave(&dd->dc8051_lock, flags);
4857         }
4858
4859         /*
4860          * If there is no timeout, then the 8051 command interface is
4861          * waiting for a command.
4862          */
4863
4864         /*
4865          * Do two writes: the first to stabilize the type and req_data, the
4866          * second to activate.
4867          */
4868         reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4869                         << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4870                 | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4871                         << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4872         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4873         reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4874         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4875
4876         /* wait for completion, alternate: interrupt */
4877         timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4878         while (1) {
4879                 reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4880                 completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4881                 if (completed)
4882                         break;
4883                 if (time_after(jiffies, timeout)) {
4884                         dd->dc8051_timed_out++;
4885                         dd_dev_err(dd, "8051 host command %u timeout\n", type);
4886                         if (out_data)
4887                                 *out_data = 0;
4888                         return_code = -ETIMEDOUT;
4889                         goto fail;
4890                 }
4891                 udelay(2);
4892         }
4893
4894         if (out_data) {
4895                 *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4896                                 & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4897                 if (type == HCMD_READ_LCB_CSR) {
4898                         /* top 16 bits are in a different register */
4899                         *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4900                                 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4901                                 << (48
4902                                     - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4903                 }
4904         }
4905         return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4906                                 & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4907         dd->dc8051_timed_out = 0;
4908         /*
4909          * Clear command for next user.
4910          */
4911         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4912
4913 fail:
4914         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4915
4916         return return_code;
4917 }
4918
4919 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4920 {
4921         return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4922 }
4923
4924 static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4925                             u8 lane_id, u32 config_data)
4926 {
4927         u64 data;
4928         int ret;
4929
4930         data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4931                 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4932                 | (u64)config_data << LOAD_DATA_DATA_SHIFT;
4933         ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4934         if (ret != HCMD_SUCCESS) {
4935                 dd_dev_err(dd,
4936                         "load 8051 config: field id %d, lane %d, err %d\n",
4937                         (int)field_id, (int)lane_id, ret);
4938         }
4939         return ret;
4940 }
4941
4942 /*
4943  * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4944  * set the result, even on error.
4945  * Return 0 on success, -errno on failure
4946  */
4947 static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4948                             u32 *result)
4949 {
4950         u64 big_data;
4951         u32 addr;
4952         int ret;
4953
4954         /* address start depends on the lane_id */
4955         if (lane_id < 4)
4956                 addr = (4 * NUM_GENERAL_FIELDS)
4957                         + (lane_id * 4 * NUM_LANE_FIELDS);
4958         else
4959                 addr = 0;
4960         addr += field_id * 4;
4961
4962         /* read is in 8-byte chunks, hardware will truncate the address down */
4963         ret = read_8051_data(dd, addr, 8, &big_data);
4964
4965         if (ret == 0) {
4966                 /* extract the 4 bytes we want */
4967                 if (addr & 0x4)
4968                         *result = (u32)(big_data >> 32);
4969                 else
4970                         *result = (u32)big_data;
4971         } else {
4972                 *result = 0;
4973                 dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4974                         __func__, lane_id, field_id);
4975         }
4976
4977         return ret;
4978 }
4979
4980 static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4981                               u8 continuous)
4982 {
4983         u32 frame;
4984
4985         frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4986                 | power_management << POWER_MANAGEMENT_SHIFT;
4987         return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4988                                 GENERAL_CONFIG, frame);
4989 }
4990
4991 static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4992                                  u16 vl15buf, u8 crc_sizes)
4993 {
4994         u32 frame;
4995
4996         frame = (u32)vau << VAU_SHIFT
4997                 | (u32)z << Z_SHIFT
4998                 | (u32)vcu << VCU_SHIFT
4999                 | (u32)vl15buf << VL15BUF_SHIFT
5000                 | (u32)crc_sizes << CRC_SIZES_SHIFT;
5001         return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5002                                 GENERAL_CONFIG, frame);
5003 }
5004
5005 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5006                                      u8 *flag_bits, u16 *link_widths)
5007 {
5008         u32 frame;
5009
5010         read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5011                                 &frame);
5012         *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5013         *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5014         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5015 }
5016
5017 static int write_vc_local_link_width(struct hfi1_devdata *dd,
5018                                      u8 misc_bits,
5019                                      u8 flag_bits,
5020                                      u16 link_widths)
5021 {
5022         u32 frame;
5023
5024         frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5025                 | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5026                 | (u32)link_widths << LINK_WIDTH_SHIFT;
5027         return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5028                      frame);
5029 }
5030
5031 static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5032                                  u8 device_rev)
5033 {
5034         u32 frame;
5035
5036         frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5037                 | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5038         return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5039 }
5040
5041 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5042                                   u8 *device_rev)
5043 {
5044         u32 frame;
5045
5046         read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5047         *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5048         *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5049                         & REMOTE_DEVICE_REV_MASK;
5050 }
5051
5052 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5053 {
5054         u32 frame;
5055
5056         read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5057         *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5058         *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5059 }
5060
5061 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5062                                u8 *continuous)
5063 {
5064         u32 frame;
5065
5066         read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5067         *power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5068                                         & POWER_MANAGEMENT_MASK;
5069         *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5070                                         & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5071 }
5072
5073 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5074                                   u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5075 {
5076         u32 frame;
5077
5078         read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5079         *vau = (frame >> VAU_SHIFT) & VAU_MASK;
5080         *z = (frame >> Z_SHIFT) & Z_MASK;
5081         *vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5082         *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5083         *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5084 }
5085
5086 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5087                                       u8 *remote_tx_rate,
5088                                       u16 *link_widths)
5089 {
5090         u32 frame;
5091
5092         read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5093                                 &frame);
5094         *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5095                                 & REMOTE_TX_RATE_MASK;
5096         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5097 }
5098
5099 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5100 {
5101         u32 frame;
5102
5103         read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5104         *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5105 }
5106
5107 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5108 {
5109         u32 frame;
5110
5111         read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5112         *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5113 }
5114
5115 static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5116 {
5117         read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5118 }
5119
5120 static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5121 {
5122         read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5123 }
5124
5125 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5126 {
5127         u32 frame;
5128         int ret;
5129
5130         *link_quality = 0;
5131         if (dd->pport->host_link_state & HLS_UP) {
5132                 ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5133                                         &frame);
5134                 if (ret == 0)
5135                         *link_quality = (frame >> LINK_QUALITY_SHIFT)
5136                                                 & LINK_QUALITY_MASK;
5137         }
5138 }
5139
5140 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5141 {
5142         u32 frame;
5143
5144         read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5145         *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5146 }
5147
5148 static int read_tx_settings(struct hfi1_devdata *dd,
5149                             u8 *enable_lane_tx,
5150                             u8 *tx_polarity_inversion,
5151                             u8 *rx_polarity_inversion,
5152                             u8 *max_rate)
5153 {
5154         u32 frame;
5155         int ret;
5156
5157         ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5158         *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5159                                 & ENABLE_LANE_TX_MASK;
5160         *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5161                                 & TX_POLARITY_INVERSION_MASK;
5162         *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5163                                 & RX_POLARITY_INVERSION_MASK;
5164         *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5165         return ret;
5166 }
5167
5168 static int write_tx_settings(struct hfi1_devdata *dd,
5169                              u8 enable_lane_tx,
5170                              u8 tx_polarity_inversion,
5171                              u8 rx_polarity_inversion,
5172                              u8 max_rate)
5173 {
5174         u32 frame;
5175
5176         /* no need to mask, all variable sizes match field widths */
5177         frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5178                 | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5179                 | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5180                 | max_rate << MAX_RATE_SHIFT;
5181         return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5182 }
5183
5184 static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5185 {
5186         u32 frame, version, prod_id;
5187         int ret, lane;
5188
5189         /* 4 lanes */
5190         for (lane = 0; lane < 4; lane++) {
5191                 ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5192                 if (ret) {
5193                         dd_dev_err(
5194                                 dd,
5195                                 "Unable to read lane %d firmware details\n",
5196                                 lane);
5197                         continue;
5198                 }
5199                 version = (frame >> SPICO_ROM_VERSION_SHIFT)
5200                                         & SPICO_ROM_VERSION_MASK;
5201                 prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5202                                         & SPICO_ROM_PROD_ID_MASK;
5203                 dd_dev_info(dd,
5204                         "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5205                         lane, version, prod_id);
5206         }
5207 }
5208
5209 /*
5210  * Read an idle LCB message.
5211  *
5212  * Returns 0 on success, -EINVAL on error
5213  */
5214 static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5215 {
5216         int ret;
5217
5218         ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5219                 type, data_out);
5220         if (ret != HCMD_SUCCESS) {
5221                 dd_dev_err(dd, "read idle message: type %d, err %d\n",
5222                         (u32)type, ret);
5223                 return -EINVAL;
5224         }
5225         dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5226         /* return only the payload as we already know the type */
5227         *data_out >>= IDLE_PAYLOAD_SHIFT;
5228         return 0;
5229 }
5230
5231 /*
5232  * Read an idle SMA message.  To be done in response to a notification from
5233  * the 8051.
5234  *
5235  * Returns 0 on success, -EINVAL on error
5236  */
5237 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5238 {
5239         return read_idle_message(dd,
5240                         (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5241 }
5242
5243 /*
5244  * Send an idle LCB message.
5245  *
5246  * Returns 0 on success, -EINVAL on error
5247  */
5248 static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5249 {
5250         int ret;
5251
5252         dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5253         ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5254         if (ret != HCMD_SUCCESS) {
5255                 dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5256                         data, ret);
5257                 return -EINVAL;
5258         }
5259         return 0;
5260 }
5261
5262 /*
5263  * Send an idle SMA message.
5264  *
5265  * Returns 0 on success, -EINVAL on error
5266  */
5267 int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5268 {
5269         u64 data;
5270
5271         data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5272                 | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5273         return send_idle_message(dd, data);
5274 }
5275
5276 /*
5277  * Initialize the LCB then do a quick link up.  This may or may not be
5278  * in loopback.
5279  *
5280  * return 0 on success, -errno on error
5281  */
5282 static int do_quick_linkup(struct hfi1_devdata *dd)
5283 {
5284         u64 reg;
5285         unsigned long timeout;
5286         int ret;
5287
5288         lcb_shutdown(dd, 0);
5289
5290         if (loopback) {
5291                 /* LCB_CFG_LOOPBACK.VAL = 2 */
5292                 /* LCB_CFG_LANE_WIDTH.VAL = 0 */
5293                 write_csr(dd, DC_LCB_CFG_LOOPBACK,
5294                         IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5295                 write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5296         }
5297
5298         /* start the LCBs */
5299         /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5300         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5301
5302         /* simulator only loopback steps */
5303         if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5304                 /* LCB_CFG_RUN.EN = 1 */
5305                 write_csr(dd, DC_LCB_CFG_RUN,
5306                         1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5307
5308                 /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5309                 timeout = jiffies + msecs_to_jiffies(10);
5310                 while (1) {
5311                         reg = read_csr(dd,
5312                                 DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5313                         if (reg)
5314                                 break;
5315                         if (time_after(jiffies, timeout)) {
5316                                 dd_dev_err(dd,
5317                                         "timeout waiting for LINK_TRANSFER_ACTIVE\n");
5318                                 return -ETIMEDOUT;
5319                         }
5320                         udelay(2);
5321                 }
5322
5323                 write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5324                         1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5325         }
5326
5327         if (!loopback) {
5328                 /*
5329                  * When doing quick linkup and not in loopback, both
5330                  * sides must be done with LCB set-up before either
5331                  * starts the quick linkup.  Put a delay here so that
5332                  * both sides can be started and have a chance to be
5333                  * done with LCB set up before resuming.
5334                  */
5335                 dd_dev_err(dd,
5336                         "Pausing for peer to be finished with LCB set up\n");
5337                 msleep(5000);
5338                 dd_dev_err(dd,
5339                         "Continuing with quick linkup\n");
5340         }
5341
5342         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5343         set_8051_lcb_access(dd);
5344
5345         /*
5346          * State "quick" LinkUp request sets the physical link state to
5347          * LinkUp without a verify capability sequence.
5348          * This state is in simulator v37 and later.
5349          */
5350         ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5351         if (ret != HCMD_SUCCESS) {
5352                 dd_dev_err(dd,
5353                         "%s: set physical link state to quick LinkUp failed with return %d\n",
5354                         __func__, ret);
5355
5356                 set_host_lcb_access(dd);
5357                 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5358
5359                 if (ret >= 0)
5360                         ret = -EINVAL;
5361                 return ret;
5362         }
5363
5364         return 0; /* success */
5365 }
5366
5367 /*
5368  * Set the SerDes to internal loopback mode.
5369  * Returns 0 on success, -errno on error.
5370  */
5371 static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5372 {
5373         int ret;
5374
5375         ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5376         if (ret == HCMD_SUCCESS)
5377                 return 0;
5378         dd_dev_err(dd,
5379                 "Set physical link state to SerDes Loopback failed with return %d\n",
5380                 ret);
5381         if (ret >= 0)
5382                 ret = -EINVAL;
5383         return ret;
5384 }
5385
5386 /*
5387  * Do all special steps to set up loopback.
5388  */
5389 static int init_loopback(struct hfi1_devdata *dd)
5390 {
5391         dd_dev_info(dd, "Entering loopback mode\n");
5392
5393         /* all loopbacks should disable self GUID check */
5394         write_csr(dd, DC_DC8051_CFG_MODE,
5395                 (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5396
5397         /*
5398          * The simulator has only one loopback option - LCB.  Switch
5399          * to that option, which includes quick link up.
5400          *
5401          * Accept all valid loopback values.
5402          */
5403         if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5404                 && (loopback == LOOPBACK_SERDES
5405                         || loopback == LOOPBACK_LCB
5406                         || loopback == LOOPBACK_CABLE)) {
5407                 loopback = LOOPBACK_LCB;
5408                 quick_linkup = 1;
5409                 return 0;
5410         }
5411
5412         /* handle serdes loopback */
5413         if (loopback == LOOPBACK_SERDES) {
5414                 /* internal serdes loopack needs quick linkup on RTL */
5415                 if (dd->icode == ICODE_RTL_SILICON)
5416                         quick_linkup = 1;
5417                 return set_serdes_loopback_mode(dd);
5418         }
5419
5420         /* LCB loopback - handled at poll time */
5421         if (loopback == LOOPBACK_LCB) {
5422                 quick_linkup = 1; /* LCB is always quick linkup */
5423
5424                 /* not supported in emulation due to emulation RTL changes */
5425                 if (dd->icode == ICODE_FPGA_EMULATION) {
5426                         dd_dev_err(dd,
5427                                 "LCB loopback not supported in emulation\n");
5428                         return -EINVAL;
5429                 }
5430                 return 0;
5431         }
5432
5433         /* external cable loopback requires no extra steps */
5434         if (loopback == LOOPBACK_CABLE)
5435                 return 0;
5436
5437         dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5438         return -EINVAL;
5439 }
5440
5441 /*
5442  * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5443  * used in the Verify Capability link width attribute.
5444  */
5445 static u16 opa_to_vc_link_widths(u16 opa_widths)
5446 {
5447         int i;
5448         u16 result = 0;
5449
5450         static const struct link_bits {
5451                 u16 from;
5452                 u16 to;
5453         } opa_link_xlate[] = {
5454                 { OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5455                 { OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5456                 { OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5457                 { OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5458         };
5459
5460         for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5461                 if (opa_widths & opa_link_xlate[i].from)
5462                         result |= opa_link_xlate[i].to;
5463         }
5464         return result;
5465 }
5466
5467 /*
5468  * Set link attributes before moving to polling.
5469  */
5470 static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5471 {
5472         struct hfi1_devdata *dd = ppd->dd;
5473         u8 enable_lane_tx;
5474         u8 tx_polarity_inversion;
5475         u8 rx_polarity_inversion;
5476         int ret;
5477
5478         /* reset our fabric serdes to clear any lingering problems */
5479         fabric_serdes_reset(dd);
5480
5481         /* set the local tx rate - need to read-modify-write */
5482         ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5483                 &rx_polarity_inversion, &ppd->local_tx_rate);
5484         if (ret)
5485                 goto set_local_link_attributes_fail;
5486
5487         if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5488                 /* set the tx rate to the fastest enabled */
5489                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5490                         ppd->local_tx_rate = 1;
5491                 else
5492                         ppd->local_tx_rate = 0;
5493         } else {
5494                 /* set the tx rate to all enabled */
5495                 ppd->local_tx_rate = 0;
5496                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5497                         ppd->local_tx_rate |= 2;
5498                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5499                         ppd->local_tx_rate |= 1;
5500         }
5501
5502         enable_lane_tx = 0xF; /* enable all four lanes */
5503         ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5504                      rx_polarity_inversion, ppd->local_tx_rate);
5505         if (ret != HCMD_SUCCESS)
5506                 goto set_local_link_attributes_fail;
5507
5508         /*
5509          * DC supports continuous updates.
5510          */
5511         ret = write_vc_local_phy(dd, 0 /* no power management */,
5512                                      1 /* continuous updates */);
5513         if (ret != HCMD_SUCCESS)
5514                 goto set_local_link_attributes_fail;
5515
5516         /* z=1 in the next call: AU of 0 is not supported by the hardware */
5517         ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5518                                     ppd->port_crc_mode_enabled);
5519         if (ret != HCMD_SUCCESS)
5520                 goto set_local_link_attributes_fail;
5521
5522         ret = write_vc_local_link_width(dd, 0, 0,
5523                      opa_to_vc_link_widths(ppd->link_width_enabled));
5524         if (ret != HCMD_SUCCESS)
5525                 goto set_local_link_attributes_fail;
5526
5527         /* let peer know who we are */
5528         ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5529         if (ret == HCMD_SUCCESS)
5530                 return 0;
5531
5532 set_local_link_attributes_fail:
5533         dd_dev_err(dd,
5534                 "Failed to set local link attributes, return 0x%x\n",
5535                 ret);
5536         return ret;
5537 }
5538
5539 /*
5540  * Call this to start the link.  Schedule a retry if the cable is not
5541  * present or if unable to start polling.  Do not do anything if the
5542  * link is disabled.  Returns 0 if link is disabled or moved to polling
5543  */
5544 int start_link(struct hfi1_pportdata *ppd)
5545 {
5546         if (!ppd->link_enabled) {
5547                 dd_dev_info(ppd->dd,
5548                         "%s: stopping link start because link is disabled\n",
5549                         __func__);
5550                 return 0;
5551         }
5552         if (!ppd->driver_link_ready) {
5553                 dd_dev_info(ppd->dd,
5554                         "%s: stopping link start because driver is not ready\n",
5555                         __func__);
5556                 return 0;
5557         }
5558
5559         if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5560                         loopback == LOOPBACK_LCB ||
5561                         ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5562                 return set_link_state(ppd, HLS_DN_POLL);
5563
5564         dd_dev_info(ppd->dd,
5565                 "%s: stopping link start because no cable is present\n",
5566                 __func__);
5567         return -EAGAIN;
5568 }
5569
5570 static void reset_qsfp(struct hfi1_pportdata *ppd)
5571 {
5572         struct hfi1_devdata *dd = ppd->dd;
5573         u64 mask, qsfp_mask;
5574
5575         mask = (u64)QSFP_HFI0_RESET_N;
5576         qsfp_mask = read_csr(dd,
5577                 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5578         qsfp_mask |= mask;
5579         write_csr(dd,
5580                 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5581                 qsfp_mask);
5582
5583         qsfp_mask = read_csr(dd,
5584                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5585         qsfp_mask &= ~mask;
5586         write_csr(dd,
5587                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5588                 qsfp_mask);
5589
5590         udelay(10);
5591
5592         qsfp_mask |= mask;
5593         write_csr(dd,
5594                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5595                 qsfp_mask);
5596 }
5597
5598 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5599                                         u8 *qsfp_interrupt_status)
5600 {
5601         struct hfi1_devdata *dd = ppd->dd;
5602
5603         if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5604                 (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5605                 dd_dev_info(dd,
5606                         "%s: QSFP cable on fire\n",
5607                         __func__);
5608
5609         if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5610                 (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5611                 dd_dev_info(dd,
5612                         "%s: QSFP cable temperature too low\n",
5613                         __func__);
5614
5615         if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5616                 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5617                 dd_dev_info(dd,
5618                         "%s: QSFP supply voltage too high\n",
5619                         __func__);
5620
5621         if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5622                 (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5623                 dd_dev_info(dd,
5624                         "%s: QSFP supply voltage too low\n",
5625                         __func__);
5626
5627         /* Byte 2 is vendor specific */
5628
5629         if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5630                 (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5631                 dd_dev_info(dd,
5632                         "%s: Cable RX channel 1/2 power too high\n",
5633                         __func__);
5634
5635         if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5636                 (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5637                 dd_dev_info(dd,
5638                         "%s: Cable RX channel 1/2 power too low\n",
5639                         __func__);
5640
5641         if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5642                 (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5643                 dd_dev_info(dd,
5644                         "%s: Cable RX channel 3/4 power too high\n",
5645                         __func__);
5646
5647         if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5648                 (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5649                 dd_dev_info(dd,
5650                         "%s: Cable RX channel 3/4 power too low\n",
5651                         __func__);
5652
5653         if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5654                 (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5655                 dd_dev_info(dd,
5656                         "%s: Cable TX channel 1/2 bias too high\n",
5657                         __func__);
5658
5659         if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5660                 (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5661                 dd_dev_info(dd,
5662                         "%s: Cable TX channel 1/2 bias too low\n",
5663                         __func__);
5664
5665         if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5666                 (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5667                 dd_dev_info(dd,
5668                         "%s: Cable TX channel 3/4 bias too high\n",
5669                         __func__);
5670
5671         if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5672                 (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5673                 dd_dev_info(dd,
5674                         "%s: Cable TX channel 3/4 bias too low\n",
5675                         __func__);
5676
5677         if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5678                 (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5679                 dd_dev_info(dd,
5680                         "%s: Cable TX channel 1/2 power too high\n",
5681                         __func__);
5682
5683         if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5684                 (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5685                 dd_dev_info(dd,
5686                         "%s: Cable TX channel 1/2 power too low\n",
5687                         __func__);
5688
5689         if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5690                 (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5691                 dd_dev_info(dd,
5692                         "%s: Cable TX channel 3/4 power too high\n",
5693                         __func__);
5694
5695         if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5696                 (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5697                 dd_dev_info(dd,
5698                         "%s: Cable TX channel 3/4 power too low\n",
5699                         __func__);
5700
5701         /* Bytes 9-10 and 11-12 are reserved */
5702         /* Bytes 13-15 are vendor specific */
5703
5704         return 0;
5705 }
5706
5707 static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5708 {
5709         refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5710
5711         return 0;
5712 }
5713
5714 static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5715 {
5716         struct hfi1_devdata *dd = ppd->dd;
5717         u8 qsfp_interrupt_status = 0;
5718
5719         if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5720                 != 1) {
5721                 dd_dev_info(dd,
5722                         "%s: Failed to read status of QSFP module\n",
5723                         __func__);
5724                 return -EIO;
5725         }
5726
5727         /* We don't care about alarms & warnings with a non-functional INT_N */
5728         if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5729                 do_pre_lni_host_behaviors(ppd);
5730
5731         return 0;
5732 }
5733
5734 /* This routine will only be scheduled if the QSFP module is present */
5735 static void qsfp_event(struct work_struct *work)
5736 {
5737         struct qsfp_data *qd;
5738         struct hfi1_pportdata *ppd;
5739         struct hfi1_devdata *dd;
5740
5741         qd = container_of(work, struct qsfp_data, qsfp_work);
5742         ppd = qd->ppd;
5743         dd = ppd->dd;
5744
5745         /* Sanity check */
5746         if (!qsfp_mod_present(ppd))
5747                 return;
5748
5749         /*
5750          * Turn DC back on after cables has been
5751          * re-inserted. Up until now, the DC has been in
5752          * reset to save power.
5753          */
5754         dc_start(dd);
5755
5756         if (qd->cache_refresh_required) {
5757                 msleep(3000);
5758                 reset_qsfp(ppd);
5759
5760                 /* Check for QSFP interrupt after t_init (SFF 8679)
5761                  * + extra
5762                  */
5763                 msleep(3000);
5764                 if (!qd->qsfp_interrupt_functional) {
5765                         if (do_qsfp_intr_fallback(ppd) < 0)
5766                                 dd_dev_info(dd, "%s: QSFP fallback failed\n",
5767                                         __func__);
5768                         ppd->driver_link_ready = 1;
5769                         start_link(ppd);
5770                 }
5771         }
5772
5773         if (qd->check_interrupt_flags) {
5774                 u8 qsfp_interrupt_status[16] = {0,};
5775
5776                 if (qsfp_read(ppd, dd->hfi1_id, 6,
5777                               &qsfp_interrupt_status[0], 16) != 16) {
5778                         dd_dev_info(dd,
5779                                 "%s: Failed to read status of QSFP module\n",
5780                                 __func__);
5781                 } else {
5782                         unsigned long flags;
5783                         u8 data_status;
5784
5785                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5786                         ppd->qsfp_info.check_interrupt_flags = 0;
5787                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5788                                                                 flags);
5789
5790                         if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5791                                  != 1) {
5792                                 dd_dev_info(dd,
5793                                 "%s: Failed to read status of QSFP module\n",
5794                                         __func__);
5795                         }
5796                         if (!(data_status & QSFP_DATA_NOT_READY)) {
5797                                 do_pre_lni_host_behaviors(ppd);
5798                                 start_link(ppd);
5799                         } else
5800                                 handle_qsfp_error_conditions(ppd,
5801                                                 qsfp_interrupt_status);
5802                 }
5803         }
5804 }
5805
5806 void init_qsfp(struct hfi1_pportdata *ppd)
5807 {
5808         struct hfi1_devdata *dd = ppd->dd;
5809         u64 qsfp_mask;
5810
5811         if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5812                         ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5813                 ppd->driver_link_ready = 1;
5814                 return;
5815         }
5816
5817         ppd->qsfp_info.ppd = ppd;
5818         INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5819
5820         qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5821         /* Clear current status to avoid spurious interrupts */
5822         write_csr(dd,
5823                         dd->hfi1_id ?
5824                                 ASIC_QSFP2_CLEAR :
5825                                 ASIC_QSFP1_CLEAR,
5826                 qsfp_mask);
5827
5828         /* Handle active low nature of INT_N and MODPRST_N pins */
5829         if (qsfp_mod_present(ppd))
5830                 qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5831         write_csr(dd,
5832                   dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5833                   qsfp_mask);
5834
5835         /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5836         qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5837         write_csr(dd,
5838                 dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5839                 qsfp_mask);
5840
5841         if (qsfp_mod_present(ppd)) {
5842                 msleep(3000);
5843                 reset_qsfp(ppd);
5844
5845                 /* Check for QSFP interrupt after t_init (SFF 8679)
5846                  * + extra
5847                  */
5848                 msleep(3000);
5849                 if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5850                         if (do_qsfp_intr_fallback(ppd) < 0)
5851                                 dd_dev_info(dd,
5852                                         "%s: QSFP fallback failed\n",
5853                                         __func__);
5854                         ppd->driver_link_ready = 1;
5855                 }
5856         }
5857 }
5858
5859 int bringup_serdes(struct hfi1_pportdata *ppd)
5860 {
5861         struct hfi1_devdata *dd = ppd->dd;
5862         u64 guid;
5863         int ret;
5864
5865         if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5866                 add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5867
5868         guid = ppd->guid;
5869         if (!guid) {
5870                 if (dd->base_guid)
5871                         guid = dd->base_guid + ppd->port - 1;
5872                 ppd->guid = guid;
5873         }
5874
5875         /* the link defaults to enabled */
5876         ppd->link_enabled = 1;
5877         /* Set linkinit_reason on power up per OPA spec */
5878         ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5879
5880         if (loopback) {
5881                 ret = init_loopback(dd);
5882                 if (ret < 0)
5883                         return ret;
5884         }
5885
5886         return start_link(ppd);
5887 }
5888
5889 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5890 {
5891         struct hfi1_devdata *dd = ppd->dd;
5892
5893         /*
5894          * Shut down the link and keep it down.   First turn off that the
5895          * driver wants to allow the link to be up (driver_link_ready).
5896          * Then make sure the link is not automatically restarted
5897          * (link_enabled).  Cancel any pending restart.  And finally
5898          * go offline.
5899          */
5900         ppd->driver_link_ready = 0;
5901         ppd->link_enabled = 0;
5902
5903         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5904           OPA_LINKDOWN_REASON_SMA_DISABLED);
5905         set_link_state(ppd, HLS_DN_OFFLINE);
5906
5907         /* disable the port */
5908         clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5909         cancel_work_sync(&ppd->freeze_work);
5910 }
5911
5912 static inline int init_cpu_counters(struct hfi1_devdata *dd)
5913 {
5914         struct hfi1_pportdata *ppd;
5915         int i;
5916
5917         ppd = (struct hfi1_pportdata *)(dd + 1);
5918         for (i = 0; i < dd->num_pports; i++, ppd++) {
5919                 ppd->ibport_data.rc_acks = NULL;
5920                 ppd->ibport_data.rc_qacks = NULL;
5921                 ppd->ibport_data.rc_acks = alloc_percpu(u64);
5922                 ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5923                 ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5924                 if ((ppd->ibport_data.rc_acks == NULL) ||
5925                     (ppd->ibport_data.rc_delayed_comp == NULL) ||
5926                     (ppd->ibport_data.rc_qacks == NULL))
5927                         return -ENOMEM;
5928         }
5929
5930         return 0;
5931 }
5932
5933 static const char * const pt_names[] = {
5934         "expected",
5935         "eager",
5936         "invalid"
5937 };
5938
5939 static const char *pt_name(u32 type)
5940 {
5941         return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5942 }
5943
5944 /*
5945  * index is the index into the receive array
5946  */
5947 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5948                   u32 type, unsigned long pa, u16 order)
5949 {
5950         u64 reg;
5951         void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5952                               (dd->kregbase + RCV_ARRAY));
5953
5954         if (!(dd->flags & HFI1_PRESENT))
5955                 goto done;
5956
5957         if (type == PT_INVALID) {
5958                 pa = 0;
5959         } else if (type > PT_INVALID) {
5960                 dd_dev_err(dd,
5961                         "unexpected receive array type %u for index %u, not handled\n",
5962                         type, index);
5963                 goto done;
5964         }
5965
5966         hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5967                   pt_name(type), index, pa, (unsigned long)order);
5968
5969 #define RT_ADDR_SHIFT 12        /* 4KB kernel address boundary */
5970         reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5971                 | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5972                 | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5973                                         << RCV_ARRAY_RT_ADDR_SHIFT;
5974         writeq(reg, base + (index * 8));
5975
5976         if (type == PT_EAGER)
5977                 /*
5978                  * Eager entries are written one-by-one so we have to push them
5979                  * after we write the entry.
5980                  */
5981                 flush_wc();
5982 done:
5983         return;
5984 }
5985
5986 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5987 {
5988         struct hfi1_devdata *dd = rcd->dd;
5989         u32 i;
5990
5991         /* this could be optimized */
5992         for (i = rcd->eager_base; i < rcd->eager_base +
5993                      rcd->egrbufs.alloced; i++)
5994                 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5995
5996         for (i = rcd->expected_base;
5997                         i < rcd->expected_base + rcd->expected_count; i++)
5998                 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5999 }
6000
6001 int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6002                         struct hfi1_ctxt_info *kinfo)
6003 {
6004         kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6005                 HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6006         return 0;
6007 }
6008
6009 struct hfi1_message_header *hfi1_get_msgheader(
6010                                 struct hfi1_devdata *dd, __le32 *rhf_addr)
6011 {
6012         u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6013
6014         return (struct hfi1_message_header *)
6015                 (rhf_addr - dd->rhf_offset + offset);
6016 }
6017
6018 static const char * const ib_cfg_name_strings[] = {
6019         "HFI1_IB_CFG_LIDLMC",
6020         "HFI1_IB_CFG_LWID_DG_ENB",
6021         "HFI1_IB_CFG_LWID_ENB",
6022         "HFI1_IB_CFG_LWID",
6023         "HFI1_IB_CFG_SPD_ENB",
6024         "HFI1_IB_CFG_SPD",
6025         "HFI1_IB_CFG_RXPOL_ENB",
6026         "HFI1_IB_CFG_LREV_ENB",
6027         "HFI1_IB_CFG_LINKLATENCY",
6028         "HFI1_IB_CFG_HRTBT",
6029         "HFI1_IB_CFG_OP_VLS",
6030         "HFI1_IB_CFG_VL_HIGH_CAP",
6031         "HFI1_IB_CFG_VL_LOW_CAP",
6032         "HFI1_IB_CFG_OVERRUN_THRESH",
6033         "HFI1_IB_CFG_PHYERR_THRESH",
6034         "HFI1_IB_CFG_LINKDEFAULT",
6035         "HFI1_IB_CFG_PKEYS",
6036         "HFI1_IB_CFG_MTU",
6037         "HFI1_IB_CFG_LSTATE",
6038         "HFI1_IB_CFG_VL_HIGH_LIMIT",
6039         "HFI1_IB_CFG_PMA_TICKS",
6040         "HFI1_IB_CFG_PORT"
6041 };
6042
6043 static const char *ib_cfg_name(int which)
6044 {
6045         if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6046                 return "invalid";
6047         return ib_cfg_name_strings[which];
6048 }
6049
6050 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6051 {
6052         struct hfi1_devdata *dd = ppd->dd;
6053         int val = 0;
6054
6055         switch (which) {
6056         case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6057                 val = ppd->link_width_enabled;
6058                 break;
6059         case HFI1_IB_CFG_LWID: /* currently active Link-width */
6060                 val = ppd->link_width_active;
6061                 break;
6062         case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6063                 val = ppd->link_speed_enabled;
6064                 break;
6065         case HFI1_IB_CFG_SPD: /* current Link speed */
6066                 val = ppd->link_speed_active;
6067                 break;
6068
6069         case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6070         case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6071         case HFI1_IB_CFG_LINKLATENCY:
6072                 goto unimplemented;
6073
6074         case HFI1_IB_CFG_OP_VLS:
6075                 val = ppd->vls_operational;
6076                 break;
6077         case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6078                 val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6079                 break;
6080         case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6081                 val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6082                 break;
6083         case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6084                 val = ppd->overrun_threshold;
6085                 break;
6086         case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6087                 val = ppd->phy_error_threshold;
6088                 break;
6089         case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6090                 val = dd->link_default;
6091                 break;
6092
6093         case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6094         case HFI1_IB_CFG_PMA_TICKS:
6095         default:
6096 unimplemented:
6097                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6098                         dd_dev_info(
6099                                 dd,
6100                                 "%s: which %s: not implemented\n",
6101                                 __func__,
6102                                 ib_cfg_name(which));
6103                 break;
6104         }
6105
6106         return val;
6107 }
6108
6109 /*
6110  * The largest MAD packet size.
6111  */
6112 #define MAX_MAD_PACKET 2048
6113
6114 /*
6115  * Return the maximum header bytes that can go on the _wire_
6116  * for this device. This count includes the ICRC which is
6117  * not part of the packet held in memory but it is appended
6118  * by the HW.
6119  * This is dependent on the device's receive header entry size.
6120  * HFI allows this to be set per-receive context, but the
6121  * driver presently enforces a global value.
6122  */
6123 u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6124 {
6125         /*
6126          * The maximum non-payload (MTU) bytes in LRH.PktLen are
6127          * the Receive Header Entry Size minus the PBC (or RHF) size
6128          * plus one DW for the ICRC appended by HW.
6129          *
6130          * dd->rcd[0].rcvhdrqentsize is in DW.
6131          * We use rcd[0] as all context will have the same value. Also,
6132          * the first kernel context would have been allocated by now so
6133          * we are guaranteed a valid value.
6134          */
6135         return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6136 }
6137
6138 /*
6139  * Set Send Length
6140  * @ppd - per port data
6141  *
6142  * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6143  * registers compare against LRH.PktLen, so use the max bytes included
6144  * in the LRH.
6145  *
6146  * This routine changes all VL values except VL15, which it maintains at
6147  * the same value.
6148  */
6149 static void set_send_length(struct hfi1_pportdata *ppd)
6150 {
6151         struct hfi1_devdata *dd = ppd->dd;
6152         u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6153         u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6154                               & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6155                 SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6156         int i;
6157
6158         for (i = 0; i < ppd->vls_supported; i++) {
6159                 if (dd->vld[i].mtu > maxvlmtu)
6160                         maxvlmtu = dd->vld[i].mtu;
6161                 if (i <= 3)
6162                         len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6163                                  & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6164                                 ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6165                 else
6166                         len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6167                                  & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6168                                 ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6169         }
6170         write_csr(dd, SEND_LEN_CHECK0, len1);
6171         write_csr(dd, SEND_LEN_CHECK1, len2);
6172         /* adjust kernel credit return thresholds based on new MTUs */
6173         /* all kernel receive contexts have the same hdrqentsize */
6174         for (i = 0; i < ppd->vls_supported; i++) {
6175                 sc_set_cr_threshold(dd->vld[i].sc,
6176                         sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6177                                 dd->rcd[0]->rcvhdrqentsize));
6178         }
6179         sc_set_cr_threshold(dd->vld[15].sc,
6180                 sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6181                         dd->rcd[0]->rcvhdrqentsize));
6182
6183         /* Adjust maximum MTU for the port in DC */
6184         dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6185                 (ilog2(maxvlmtu >> 8) + 1);
6186         len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6187         len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6188         len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6189                 DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6190         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6191 }
6192
6193 static void set_lidlmc(struct hfi1_pportdata *ppd)
6194 {
6195         int i;
6196         u64 sreg = 0;
6197         struct hfi1_devdata *dd = ppd->dd;
6198         u32 mask = ~((1U << ppd->lmc) - 1);
6199         u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6200
6201         if (dd->hfi1_snoop.mode_flag)
6202                 dd_dev_info(dd, "Set lid/lmc while snooping");
6203
6204         c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6205                 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6206         c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6207                         << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6208               ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6209                         << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6210         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6211
6212         /*
6213          * Iterate over all the send contexts and set their SLID check
6214          */
6215         sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6216                         SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6217                (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6218                         SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6219
6220         for (i = 0; i < dd->chip_send_contexts; i++) {
6221                 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6222                           i, (u32)sreg);
6223                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6224         }
6225
6226         /* Now we have to do the same thing for the sdma engines */
6227         sdma_update_lmc(dd, mask, ppd->lid);
6228 }
6229
6230 static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6231 {
6232         unsigned long timeout;
6233         u32 curr_state;
6234
6235         timeout = jiffies + msecs_to_jiffies(msecs);
6236         while (1) {
6237                 curr_state = read_physical_state(dd);
6238                 if (curr_state == state)
6239                         break;
6240                 if (time_after(jiffies, timeout)) {
6241                         dd_dev_err(dd,
6242                                 "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6243                                 state, curr_state);
6244                         return -ETIMEDOUT;
6245                 }
6246                 usleep_range(1950, 2050); /* sleep 2ms-ish */
6247         }
6248
6249         return 0;
6250 }
6251
6252 /*
6253  * Helper for set_link_state().  Do not call except from that routine.
6254  * Expects ppd->hls_mutex to be held.
6255  *
6256  * @rem_reason value to be sent to the neighbor
6257  *
6258  * LinkDownReasons only set if transition succeeds.
6259  */
6260 static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6261 {
6262         struct hfi1_devdata *dd = ppd->dd;
6263         u32 pstate, previous_state;
6264         u32 last_local_state;
6265         u32 last_remote_state;
6266         int ret;
6267         int do_transition;
6268         int do_wait;
6269
6270         previous_state = ppd->host_link_state;
6271         ppd->host_link_state = HLS_GOING_OFFLINE;
6272         pstate = read_physical_state(dd);
6273         if (pstate == PLS_OFFLINE) {
6274                 do_transition = 0;      /* in right state */
6275                 do_wait = 0;            /* ...no need to wait */
6276         } else if ((pstate & 0xff) == PLS_OFFLINE) {
6277                 do_transition = 0;      /* in an offline transient state */
6278                 do_wait = 1;            /* ...wait for it to settle */
6279         } else {
6280                 do_transition = 1;      /* need to move to offline */
6281                 do_wait = 1;            /* ...will need to wait */
6282         }
6283
6284         if (do_transition) {
6285                 ret = set_physical_link_state(dd,
6286                         PLS_OFFLINE | (rem_reason << 8));
6287
6288                 if (ret != HCMD_SUCCESS) {
6289                         dd_dev_err(dd,
6290                                 "Failed to transition to Offline link state, return %d\n",
6291                                 ret);
6292                         return -EINVAL;
6293                 }
6294                 if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6295                         ppd->offline_disabled_reason =
6296                         OPA_LINKDOWN_REASON_TRANSIENT;
6297         }
6298
6299         if (do_wait) {
6300                 /* it can take a while for the link to go down */
6301                 ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6302                 if (ret < 0)
6303                         return ret;
6304         }
6305
6306         /* make sure the logical state is also down */
6307         wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6308
6309         /*
6310          * Now in charge of LCB - must be after the physical state is
6311          * offline.quiet and before host_link_state is changed.
6312          */
6313         set_host_lcb_access(dd);
6314         write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6315         ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6316
6317         /*
6318          * The LNI has a mandatory wait time after the physical state
6319          * moves to Offline.Quiet.  The wait time may be different
6320          * depending on how the link went down.  The 8051 firmware
6321          * will observe the needed wait time and only move to ready
6322          * when that is completed.  The largest of the quiet timeouts
6323          * is 2.5s, so wait that long and then a bit more.
6324          */
6325         ret = wait_fm_ready(dd, 3000);
6326         if (ret) {
6327                 dd_dev_err(dd,
6328                         "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6329                 /* state is really offline, so make it so */
6330                 ppd->host_link_state = HLS_DN_OFFLINE;
6331                 return ret;
6332         }
6333
6334         /*
6335          * The state is now offline and the 8051 is ready to accept host
6336          * requests.
6337          *      - change our state
6338          *      - notify others if we were previously in a linkup state
6339          */
6340         ppd->host_link_state = HLS_DN_OFFLINE;
6341         if (previous_state & HLS_UP) {
6342                 /* went down while link was up */
6343                 handle_linkup_change(dd, 0);
6344         } else if (previous_state
6345                         & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6346                 /* went down while attempting link up */
6347                 /* byte 1 of last_*_state is the failure reason */
6348                 read_last_local_state(dd, &last_local_state);
6349                 read_last_remote_state(dd, &last_remote_state);
6350                 dd_dev_err(dd,
6351                         "LNI failure last states: local 0x%08x, remote 0x%08x\n",
6352                         last_local_state, last_remote_state);
6353         }
6354
6355         /* the active link width (downgrade) is 0 on link down */
6356         ppd->link_width_active = 0;
6357         ppd->link_width_downgrade_tx_active = 0;
6358         ppd->link_width_downgrade_rx_active = 0;
6359         ppd->current_egress_rate = 0;
6360         return 0;
6361 }
6362
6363 /* return the link state name */
6364 static const char *link_state_name(u32 state)
6365 {
6366         const char *name;
6367         int n = ilog2(state);
6368         static const char * const names[] = {
6369                 [__HLS_UP_INIT_BP]       = "INIT",
6370                 [__HLS_UP_ARMED_BP]      = "ARMED",
6371                 [__HLS_UP_ACTIVE_BP]     = "ACTIVE",
6372                 [__HLS_DN_DOWNDEF_BP]    = "DOWNDEF",
6373                 [__HLS_DN_POLL_BP]       = "POLL",
6374                 [__HLS_DN_DISABLE_BP]    = "DISABLE",
6375                 [__HLS_DN_OFFLINE_BP]    = "OFFLINE",
6376                 [__HLS_VERIFY_CAP_BP]    = "VERIFY_CAP",
6377                 [__HLS_GOING_UP_BP]      = "GOING_UP",
6378                 [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6379                 [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6380         };
6381
6382         name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6383         return name ? name : "unknown";
6384 }
6385
6386 /* return the link state reason name */
6387 static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6388 {
6389         if (state == HLS_UP_INIT) {
6390                 switch (ppd->linkinit_reason) {
6391                 case OPA_LINKINIT_REASON_LINKUP:
6392                         return "(LINKUP)";
6393                 case OPA_LINKINIT_REASON_FLAPPING:
6394                         return "(FLAPPING)";
6395                 case OPA_LINKINIT_OUTSIDE_POLICY:
6396                         return "(OUTSIDE_POLICY)";
6397                 case OPA_LINKINIT_QUARANTINED:
6398                         return "(QUARANTINED)";
6399                 case OPA_LINKINIT_INSUFIC_CAPABILITY:
6400                         return "(INSUFIC_CAPABILITY)";
6401                 default:
6402                         break;
6403                 }
6404         }
6405         return "";
6406 }
6407
6408 /*
6409  * driver_physical_state - convert the driver's notion of a port's
6410  * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6411  * Return -1 (converted to a u32) to indicate error.
6412  */
6413 u32 driver_physical_state(struct hfi1_pportdata *ppd)
6414 {
6415         switch (ppd->host_link_state) {
6416         case HLS_UP_INIT:
6417         case HLS_UP_ARMED:
6418         case HLS_UP_ACTIVE:
6419                 return IB_PORTPHYSSTATE_LINKUP;
6420         case HLS_DN_POLL:
6421                 return IB_PORTPHYSSTATE_POLLING;
6422         case HLS_DN_DISABLE:
6423                 return IB_PORTPHYSSTATE_DISABLED;
6424         case HLS_DN_OFFLINE:
6425                 return OPA_PORTPHYSSTATE_OFFLINE;
6426         case HLS_VERIFY_CAP:
6427                 return IB_PORTPHYSSTATE_POLLING;
6428         case HLS_GOING_UP:
6429                 return IB_PORTPHYSSTATE_POLLING;
6430         case HLS_GOING_OFFLINE:
6431                 return OPA_PORTPHYSSTATE_OFFLINE;
6432         case HLS_LINK_COOLDOWN:
6433                 return OPA_PORTPHYSSTATE_OFFLINE;
6434         case HLS_DN_DOWNDEF:
6435         default:
6436                 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6437                            ppd->host_link_state);
6438                 return  -1;
6439         }
6440 }
6441
6442 /*
6443  * driver_logical_state - convert the driver's notion of a port's
6444  * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6445  * (converted to a u32) to indicate error.
6446  */
6447 u32 driver_logical_state(struct hfi1_pportdata *ppd)
6448 {
6449         if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6450                 return IB_PORT_DOWN;
6451
6452         switch (ppd->host_link_state & HLS_UP) {
6453         case HLS_UP_INIT:
6454                 return IB_PORT_INIT;
6455         case HLS_UP_ARMED:
6456                 return IB_PORT_ARMED;
6457         case HLS_UP_ACTIVE:
6458                 return IB_PORT_ACTIVE;
6459         default:
6460                 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6461                            ppd->host_link_state);
6462         return -1;
6463         }
6464 }
6465
6466 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6467                           u8 neigh_reason, u8 rem_reason)
6468 {
6469         if (ppd->local_link_down_reason.latest == 0 &&
6470             ppd->neigh_link_down_reason.latest == 0) {
6471                 ppd->local_link_down_reason.latest = lcl_reason;
6472                 ppd->neigh_link_down_reason.latest = neigh_reason;
6473                 ppd->remote_link_down_reason = rem_reason;
6474         }
6475 }
6476
6477 /*
6478  * Change the physical and/or logical link state.
6479  *
6480  * Do not call this routine while inside an interrupt.  It contains
6481  * calls to routines that can take multiple seconds to finish.
6482  *
6483  * Returns 0 on success, -errno on failure.
6484  */
6485 int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6486 {
6487         struct hfi1_devdata *dd = ppd->dd;
6488         struct ib_event event = {.device = NULL};
6489         int ret1, ret = 0;
6490         int was_up, is_down;
6491         int orig_new_state, poll_bounce;
6492
6493         mutex_lock(&ppd->hls_lock);
6494
6495         orig_new_state = state;
6496         if (state == HLS_DN_DOWNDEF)
6497                 state = dd->link_default;
6498
6499         /* interpret poll -> poll as a link bounce */
6500         poll_bounce = ppd->host_link_state == HLS_DN_POLL
6501                                 && state == HLS_DN_POLL;
6502
6503         dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6504                 link_state_name(ppd->host_link_state),
6505                 link_state_name(orig_new_state),
6506                 poll_bounce ? "(bounce) " : "",
6507                 link_state_reason_name(ppd, state));
6508
6509         was_up = !!(ppd->host_link_state & HLS_UP);
6510
6511         /*
6512          * If we're going to a (HLS_*) link state that implies the logical
6513          * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6514          * reset is_sm_config_started to 0.
6515          */
6516         if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6517                 ppd->is_sm_config_started = 0;
6518
6519         /*
6520          * Do nothing if the states match.  Let a poll to poll link bounce
6521          * go through.
6522          */
6523         if (ppd->host_link_state == state && !poll_bounce)
6524                 goto done;
6525
6526         switch (state) {
6527         case HLS_UP_INIT:
6528                 if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6529                             || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6530                         /*
6531                          * Quick link up jumps from polling to here.
6532                          *
6533                          * Whether in normal or loopback mode, the
6534                          * simulator jumps from polling to link up.
6535                          * Accept that here.
6536                          */
6537                         /* OK */;
6538                 } else if (ppd->host_link_state != HLS_GOING_UP) {
6539                         goto unexpected;
6540                 }
6541
6542                 ppd->host_link_state = HLS_UP_INIT;
6543                 ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6544                 if (ret) {
6545                         /* logical state didn't change, stay at going_up */
6546                         ppd->host_link_state = HLS_GOING_UP;
6547                         dd_dev_err(dd,
6548                                 "%s: logical state did not change to INIT\n",
6549                                 __func__);
6550                 } else {
6551                         /* clear old transient LINKINIT_REASON code */
6552                         if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6553                                 ppd->linkinit_reason =
6554                                         OPA_LINKINIT_REASON_LINKUP;
6555
6556                         /* enable the port */
6557                         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6558
6559                         handle_linkup_change(dd, 1);
6560                 }
6561                 break;
6562         case HLS_UP_ARMED:
6563                 if (ppd->host_link_state != HLS_UP_INIT)
6564                         goto unexpected;
6565
6566                 ppd->host_link_state = HLS_UP_ARMED;
6567                 set_logical_state(dd, LSTATE_ARMED);
6568                 ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6569                 if (ret) {
6570                         /* logical state didn't change, stay at init */
6571                         ppd->host_link_state = HLS_UP_INIT;
6572                         dd_dev_err(dd,
6573                                 "%s: logical state did not change to ARMED\n",
6574                                 __func__);
6575                 }
6576                 /*
6577                  * The simulator does not currently implement SMA messages,
6578                  * so neighbor_normal is not set.  Set it here when we first
6579                  * move to Armed.
6580                  */
6581                 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6582                         ppd->neighbor_normal = 1;
6583                 break;
6584         case HLS_UP_ACTIVE:
6585                 if (ppd->host_link_state != HLS_UP_ARMED)
6586                         goto unexpected;
6587
6588                 ppd->host_link_state = HLS_UP_ACTIVE;
6589                 set_logical_state(dd, LSTATE_ACTIVE);
6590                 ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6591                 if (ret) {
6592                         /* logical state didn't change, stay at armed */
6593                         ppd->host_link_state = HLS_UP_ARMED;
6594                         dd_dev_err(dd,
6595                                 "%s: logical state did not change to ACTIVE\n",
6596                                 __func__);
6597                 } else {
6598
6599                         /* tell all engines to go running */
6600                         sdma_all_running(dd);
6601
6602                         /* Signal the IB layer that the port has went active */
6603                         event.device = &dd->verbs_dev.ibdev;
6604                         event.element.port_num = ppd->port;
6605                         event.event = IB_EVENT_PORT_ACTIVE;
6606                 }
6607                 break;
6608         case HLS_DN_POLL:
6609                 if ((ppd->host_link_state == HLS_DN_DISABLE ||
6610                      ppd->host_link_state == HLS_DN_OFFLINE) &&
6611                     dd->dc_shutdown)
6612                         dc_start(dd);
6613                 /* Hand LED control to the DC */
6614                 write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6615
6616                 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6617                         u8 tmp = ppd->link_enabled;
6618
6619                         ret = goto_offline(ppd, ppd->remote_link_down_reason);
6620                         if (ret) {
6621                                 ppd->link_enabled = tmp;
6622                                 break;
6623                         }
6624                         ppd->remote_link_down_reason = 0;
6625
6626                         if (ppd->driver_link_ready)
6627                                 ppd->link_enabled = 1;
6628                 }
6629
6630                 ret = set_local_link_attributes(ppd);
6631                 if (ret)
6632                         break;
6633
6634                 ppd->port_error_action = 0;
6635                 ppd->host_link_state = HLS_DN_POLL;
6636
6637                 if (quick_linkup) {
6638                         /* quick linkup does not go into polling */
6639                         ret = do_quick_linkup(dd);
6640                 } else {
6641                         ret1 = set_physical_link_state(dd, PLS_POLLING);
6642                         if (ret1 != HCMD_SUCCESS) {
6643                                 dd_dev_err(dd,
6644                                         "Failed to transition to Polling link state, return 0x%x\n",
6645                                         ret1);
6646                                 ret = -EINVAL;
6647                         }
6648                 }
6649                 ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6650                 /*
6651                  * If an error occurred above, go back to offline.  The
6652                  * caller may reschedule another attempt.
6653                  */
6654                 if (ret)
6655                         goto_offline(ppd, 0);
6656                 break;
6657         case HLS_DN_DISABLE:
6658                 /* link is disabled */
6659                 ppd->link_enabled = 0;
6660
6661                 /* allow any state to transition to disabled */
6662
6663                 /* must transition to offline first */
6664                 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6665                         ret = goto_offline(ppd, ppd->remote_link_down_reason);
6666                         if (ret)
6667                                 break;
6668                         ppd->remote_link_down_reason = 0;
6669                 }
6670
6671                 ret1 = set_physical_link_state(dd, PLS_DISABLED);
6672                 if (ret1 != HCMD_SUCCESS) {
6673                         dd_dev_err(dd,
6674                                 "Failed to transition to Disabled link state, return 0x%x\n",
6675                                 ret1);
6676                         ret = -EINVAL;
6677                         break;
6678                 }
6679                 ppd->host_link_state = HLS_DN_DISABLE;
6680                 dc_shutdown(dd);
6681                 break;
6682         case HLS_DN_OFFLINE:
6683                 if (ppd->host_link_state == HLS_DN_DISABLE)
6684                         dc_start(dd);
6685
6686                 /* allow any state to transition to offline */
6687                 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6688                 if (!ret)
6689                         ppd->remote_link_down_reason = 0;
6690                 break;
6691         case HLS_VERIFY_CAP:
6692                 if (ppd->host_link_state != HLS_DN_POLL)
6693                         goto unexpected;
6694                 ppd->host_link_state = HLS_VERIFY_CAP;
6695                 break;
6696         case HLS_GOING_UP:
6697                 if (ppd->host_link_state != HLS_VERIFY_CAP)
6698                         goto unexpected;
6699
6700                 ret1 = set_physical_link_state(dd, PLS_LINKUP);
6701                 if (ret1 != HCMD_SUCCESS) {
6702                         dd_dev_err(dd,
6703                                 "Failed to transition to link up state, return 0x%x\n",
6704                                 ret1);
6705                         ret = -EINVAL;
6706                         break;
6707                 }
6708                 ppd->host_link_state = HLS_GOING_UP;
6709                 break;
6710
6711         case HLS_GOING_OFFLINE:         /* transient within goto_offline() */
6712         case HLS_LINK_COOLDOWN:         /* transient within goto_offline() */
6713         default:
6714                 dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6715                         __func__, state);
6716                 ret = -EINVAL;
6717                 break;
6718         }
6719
6720         is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6721                         HLS_DN_DISABLE | HLS_DN_OFFLINE));
6722
6723         if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6724             ppd->neigh_link_down_reason.sma == 0) {
6725                 ppd->local_link_down_reason.sma =
6726                   ppd->local_link_down_reason.latest;
6727                 ppd->neigh_link_down_reason.sma =
6728                   ppd->neigh_link_down_reason.latest;
6729         }
6730
6731         goto done;
6732
6733 unexpected:
6734         dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6735                 __func__, link_state_name(ppd->host_link_state),
6736                 link_state_name(state));
6737         ret = -EINVAL;
6738
6739 done:
6740         mutex_unlock(&ppd->hls_lock);
6741
6742         if (event.device)
6743                 ib_dispatch_event(&event);
6744
6745         return ret;
6746 }
6747
6748 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6749 {
6750         u64 reg;
6751         int ret = 0;
6752
6753         switch (which) {
6754         case HFI1_IB_CFG_LIDLMC:
6755                 set_lidlmc(ppd);
6756                 break;
6757         case HFI1_IB_CFG_VL_HIGH_LIMIT:
6758                 /*
6759                  * The VL Arbitrator high limit is sent in units of 4k
6760                  * bytes, while HFI stores it in units of 64 bytes.
6761                  */
6762                 val *= 4096/64;
6763                 reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6764                         << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6765                 write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6766                 break;
6767         case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6768                 /* HFI only supports POLL as the default link down state */
6769                 if (val != HLS_DN_POLL)
6770                         ret = -EINVAL;
6771                 break;
6772         case HFI1_IB_CFG_OP_VLS:
6773                 if (ppd->vls_operational != val) {
6774                         ppd->vls_operational = val;
6775                         if (!ppd->port)
6776                                 ret = -EINVAL;
6777                         else
6778                                 ret = sdma_map_init(
6779                                         ppd->dd,
6780                                         ppd->port - 1,
6781                                         val,
6782                                         NULL);
6783                 }
6784                 break;
6785         /*
6786          * For link width, link width downgrade, and speed enable, always AND
6787          * the setting with what is actually supported.  This has two benefits.
6788          * First, enabled can't have unsupported values, no matter what the
6789          * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6790          * "fill in with your supported value" have all the bits in the
6791          * field set, so simply ANDing with supported has the desired result.
6792          */
6793         case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6794                 ppd->link_width_enabled = val & ppd->link_width_supported;
6795                 break;
6796         case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6797                 ppd->link_width_downgrade_enabled =
6798                                 val & ppd->link_width_downgrade_supported;
6799                 break;
6800         case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6801                 ppd->link_speed_enabled = val & ppd->link_speed_supported;
6802                 break;
6803         case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6804                 /*
6805                  * HFI does not follow IB specs, save this value
6806                  * so we can report it, if asked.
6807                  */
6808                 ppd->overrun_threshold = val;
6809                 break;
6810         case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6811                 /*
6812                  * HFI does not follow IB specs, save this value
6813                  * so we can report it, if asked.
6814                  */
6815                 ppd->phy_error_threshold = val;
6816                 break;
6817
6818         case HFI1_IB_CFG_MTU:
6819                 set_send_length(ppd);
6820                 break;
6821
6822         case HFI1_IB_CFG_PKEYS:
6823                 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6824                         set_partition_keys(ppd);
6825                 break;
6826
6827         default:
6828                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6829                         dd_dev_info(ppd->dd,
6830                           "%s: which %s, val 0x%x: not implemented\n",
6831                           __func__, ib_cfg_name(which), val);
6832                 break;
6833         }
6834         return ret;
6835 }
6836
6837 /* begin functions related to vl arbitration table caching */
6838 static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6839 {
6840         int i;
6841
6842         BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6843                         VL_ARB_LOW_PRIO_TABLE_SIZE);
6844         BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6845                         VL_ARB_HIGH_PRIO_TABLE_SIZE);
6846
6847         /*
6848          * Note that we always return values directly from the
6849          * 'vl_arb_cache' (and do no CSR reads) in response to a
6850          * 'Get(VLArbTable)'. This is obviously correct after a
6851          * 'Set(VLArbTable)', since the cache will then be up to
6852          * date. But it's also correct prior to any 'Set(VLArbTable)'
6853          * since then both the cache, and the relevant h/w registers
6854          * will be zeroed.
6855          */
6856
6857         for (i = 0; i < MAX_PRIO_TABLE; i++)
6858                 spin_lock_init(&ppd->vl_arb_cache[i].lock);
6859 }
6860
6861 /*
6862  * vl_arb_lock_cache
6863  *
6864  * All other vl_arb_* functions should be called only after locking
6865  * the cache.
6866  */
6867 static inline struct vl_arb_cache *
6868 vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6869 {
6870         if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6871                 return NULL;
6872         spin_lock(&ppd->vl_arb_cache[idx].lock);
6873         return &ppd->vl_arb_cache[idx];
6874 }
6875
6876 static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6877 {
6878         spin_unlock(&ppd->vl_arb_cache[idx].lock);
6879 }
6880
6881 static void vl_arb_get_cache(struct vl_arb_cache *cache,
6882                              struct ib_vl_weight_elem *vl)
6883 {
6884         memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6885 }
6886
6887 static void vl_arb_set_cache(struct vl_arb_cache *cache,
6888                              struct ib_vl_weight_elem *vl)
6889 {
6890         memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6891 }
6892
6893 static int vl_arb_match_cache(struct vl_arb_cache *cache,
6894                               struct ib_vl_weight_elem *vl)
6895 {
6896         return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6897 }
6898 /* end functions related to vl arbitration table caching */
6899
6900 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6901                           u32 size, struct ib_vl_weight_elem *vl)
6902 {
6903         struct hfi1_devdata *dd = ppd->dd;
6904         u64 reg;
6905         unsigned int i, is_up = 0;
6906         int drain, ret = 0;
6907
6908         mutex_lock(&ppd->hls_lock);
6909
6910         if (ppd->host_link_state & HLS_UP)
6911                 is_up = 1;
6912
6913         drain = !is_ax(dd) && is_up;
6914
6915         if (drain)
6916                 /*
6917                  * Before adjusting VL arbitration weights, empty per-VL
6918                  * FIFOs, otherwise a packet whose VL weight is being
6919                  * set to 0 could get stuck in a FIFO with no chance to
6920                  * egress.
6921                  */
6922                 ret = stop_drain_data_vls(dd);
6923
6924         if (ret) {
6925                 dd_dev_err(
6926                         dd,
6927                         "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6928                         __func__);
6929                 goto err;
6930         }
6931
6932         for (i = 0; i < size; i++, vl++) {
6933                 /*
6934                  * NOTE: The low priority shift and mask are used here, but
6935                  * they are the same for both the low and high registers.
6936                  */
6937                 reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6938                                 << SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6939                       | (((u64)vl->weight
6940                                 & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6941                                 << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6942                 write_csr(dd, target + (i * 8), reg);
6943         }
6944         pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6945
6946         if (drain)
6947                 open_fill_data_vls(dd); /* reopen all VLs */
6948
6949 err:
6950         mutex_unlock(&ppd->hls_lock);
6951
6952         return ret;
6953 }
6954
6955 /*
6956  * Read one credit merge VL register.
6957  */
6958 static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6959                            struct vl_limit *vll)
6960 {
6961         u64 reg = read_csr(dd, csr);
6962
6963         vll->dedicated = cpu_to_be16(
6964                 (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6965                 & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6966         vll->shared = cpu_to_be16(
6967                 (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6968                 & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6969 }
6970
6971 /*
6972  * Read the current credit merge limits.
6973  */
6974 static int get_buffer_control(struct hfi1_devdata *dd,
6975                               struct buffer_control *bc, u16 *overall_limit)
6976 {
6977         u64 reg;
6978         int i;
6979
6980         /* not all entries are filled in */
6981         memset(bc, 0, sizeof(*bc));
6982
6983         /* OPA and HFI have a 1-1 mapping */
6984         for (i = 0; i < TXE_NUM_DATA_VL; i++)
6985                 read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6986
6987         /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6988         read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6989
6990         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6991         bc->overall_shared_limit = cpu_to_be16(
6992                 (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6993                 & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6994         if (overall_limit)
6995                 *overall_limit = (reg
6996                         >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6997                         & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6998         return sizeof(struct buffer_control);
6999 }
7000
7001 static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7002 {
7003         u64 reg;
7004         int i;
7005
7006         /* each register contains 16 SC->VLnt mappings, 4 bits each */
7007         reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7008         for (i = 0; i < sizeof(u64); i++) {
7009                 u8 byte = *(((u8 *)&reg) + i);
7010
7011                 dp->vlnt[2 * i] = byte & 0xf;
7012                 dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7013         }
7014
7015         reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7016         for (i = 0; i < sizeof(u64); i++) {
7017                 u8 byte = *(((u8 *)&reg) + i);
7018
7019                 dp->vlnt[16 + (2 * i)] = byte & 0xf;
7020                 dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7021         }
7022         return sizeof(struct sc2vlnt);
7023 }
7024
7025 static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7026                               struct ib_vl_weight_elem *vl)
7027 {
7028         unsigned int i;
7029
7030         for (i = 0; i < nelems; i++, vl++) {
7031                 vl->vl = 0xf;
7032                 vl->weight = 0;
7033         }
7034 }
7035
7036 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7037 {
7038         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7039                 DC_SC_VL_VAL(15_0,
7040                 0, dp->vlnt[0] & 0xf,
7041                 1, dp->vlnt[1] & 0xf,
7042                 2, dp->vlnt[2] & 0xf,
7043                 3, dp->vlnt[3] & 0xf,
7044                 4, dp->vlnt[4] & 0xf,
7045                 5, dp->vlnt[5] & 0xf,
7046                 6, dp->vlnt[6] & 0xf,
7047                 7, dp->vlnt[7] & 0xf,
7048                 8, dp->vlnt[8] & 0xf,
7049                 9, dp->vlnt[9] & 0xf,
7050                 10, dp->vlnt[10] & 0xf,
7051                 11, dp->vlnt[11] & 0xf,
7052                 12, dp->vlnt[12] & 0xf,
7053                 13, dp->vlnt[13] & 0xf,
7054                 14, dp->vlnt[14] & 0xf,
7055                 15, dp->vlnt[15] & 0xf));
7056         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7057                 DC_SC_VL_VAL(31_16,
7058                 16, dp->vlnt[16] & 0xf,
7059                 17, dp->vlnt[17] & 0xf,
7060                 18, dp->vlnt[18] & 0xf,
7061                 19, dp->vlnt[19] & 0xf,
7062                 20, dp->vlnt[20] & 0xf,
7063                 21, dp->vlnt[21] & 0xf,
7064                 22, dp->vlnt[22] & 0xf,
7065                 23, dp->vlnt[23] & 0xf,
7066                 24, dp->vlnt[24] & 0xf,
7067                 25, dp->vlnt[25] & 0xf,
7068                 26, dp->vlnt[26] & 0xf,
7069                 27, dp->vlnt[27] & 0xf,
7070                 28, dp->vlnt[28] & 0xf,
7071                 29, dp->vlnt[29] & 0xf,
7072                 30, dp->vlnt[30] & 0xf,
7073                 31, dp->vlnt[31] & 0xf));
7074 }
7075
7076 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7077                         u16 limit)
7078 {
7079         if (limit != 0)
7080                 dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7081                         what, (int)limit, idx);
7082 }
7083
7084 /* change only the shared limit portion of SendCmGLobalCredit */
7085 static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7086 {
7087         u64 reg;
7088
7089         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7090         reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7091         reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7092         write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7093 }
7094
7095 /* change only the total credit limit portion of SendCmGLobalCredit */
7096 static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7097 {
7098         u64 reg;
7099
7100         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7101         reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7102         reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7103         write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7104 }
7105
7106 /* set the given per-VL shared limit */
7107 static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7108 {
7109         u64 reg;
7110         u32 addr;
7111
7112         if (vl < TXE_NUM_DATA_VL)
7113                 addr = SEND_CM_CREDIT_VL + (8 * vl);
7114         else
7115                 addr = SEND_CM_CREDIT_VL15;
7116
7117         reg = read_csr(dd, addr);
7118         reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7119         reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7120         write_csr(dd, addr, reg);
7121 }
7122
7123 /* set the given per-VL dedicated limit */
7124 static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7125 {
7126         u64 reg;
7127         u32 addr;
7128
7129         if (vl < TXE_NUM_DATA_VL)
7130                 addr = SEND_CM_CREDIT_VL + (8 * vl);
7131         else
7132                 addr = SEND_CM_CREDIT_VL15;
7133
7134         reg = read_csr(dd, addr);
7135         reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7136         reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7137         write_csr(dd, addr, reg);
7138 }
7139
7140 /* spin until the given per-VL status mask bits clear */
7141 static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7142                                      const char *which)
7143 {
7144         unsigned long timeout;
7145         u64 reg;
7146
7147         timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7148         while (1) {
7149                 reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7150
7151                 if (reg == 0)
7152                         return; /* success */
7153                 if (time_after(jiffies, timeout))
7154                         break;          /* timed out */
7155                 udelay(1);
7156         }
7157
7158         dd_dev_err(dd,
7159                 "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7160                 which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7161         /*
7162          * If this occurs, it is likely there was a credit loss on the link.
7163          * The only recovery from that is a link bounce.
7164          */
7165         dd_dev_err(dd,
7166                 "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7167 }
7168
7169 /*
7170  * The number of credits on the VLs may be changed while everything
7171  * is "live", but the following algorithm must be followed due to
7172  * how the hardware is actually implemented.  In particular,
7173  * Return_Credit_Status[] is the only correct status check.
7174  *
7175  * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7176  *     set Global_Shared_Credit_Limit = 0
7177  *     use_all_vl = 1
7178  * mask0 = all VLs that are changing either dedicated or shared limits
7179  * set Shared_Limit[mask0] = 0
7180  * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7181  * if (changing any dedicated limit)
7182  *     mask1 = all VLs that are lowering dedicated limits
7183  *     lower Dedicated_Limit[mask1]
7184  *     spin until Return_Credit_Status[mask1] == 0
7185  *     raise Dedicated_Limits
7186  * raise Shared_Limits
7187  * raise Global_Shared_Credit_Limit
7188  *
7189  * lower = if the new limit is lower, set the limit to the new value
7190  * raise = if the new limit is higher than the current value (may be changed
7191  *      earlier in the algorithm), set the new limit to the new value
7192  */
7193 static int set_buffer_control(struct hfi1_devdata *dd,
7194                               struct buffer_control *new_bc)
7195 {
7196         u64 changing_mask, ld_mask, stat_mask;
7197         int change_count;
7198         int i, use_all_mask;
7199         int this_shared_changing;
7200         /*
7201          * A0: add the variable any_shared_limit_changing below and in the
7202          * algorithm above.  If removing A0 support, it can be removed.
7203          */
7204         int any_shared_limit_changing;
7205         struct buffer_control cur_bc;
7206         u8 changing[OPA_MAX_VLS];
7207         u8 lowering_dedicated[OPA_MAX_VLS];
7208         u16 cur_total;
7209         u32 new_total = 0;
7210         const u64 all_mask =
7211         SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7212          | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7213          | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7214          | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7215          | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7216          | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7217          | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7218          | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7219          | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7220
7221 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7222 #define NUM_USABLE_VLS 16       /* look at VL15 and less */
7223
7224
7225         /* find the new total credits, do sanity check on unused VLs */
7226         for (i = 0; i < OPA_MAX_VLS; i++) {
7227                 if (valid_vl(i)) {
7228                         new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7229                         continue;
7230                 }
7231                 nonzero_msg(dd, i, "dedicated",
7232                         be16_to_cpu(new_bc->vl[i].dedicated));
7233                 nonzero_msg(dd, i, "shared",
7234                         be16_to_cpu(new_bc->vl[i].shared));
7235                 new_bc->vl[i].dedicated = 0;
7236                 new_bc->vl[i].shared = 0;
7237         }
7238         new_total += be16_to_cpu(new_bc->overall_shared_limit);
7239         if (new_total > (u32)dd->link_credits)
7240                 return -EINVAL;
7241         /* fetch the current values */
7242         get_buffer_control(dd, &cur_bc, &cur_total);
7243
7244         /*
7245          * Create the masks we will use.
7246          */
7247         memset(changing, 0, sizeof(changing));
7248         memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7249         /* NOTE: Assumes that the individual VL bits are adjacent and in
7250            increasing order */
7251         stat_mask =
7252                 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7253         changing_mask = 0;
7254         ld_mask = 0;
7255         change_count = 0;
7256         any_shared_limit_changing = 0;
7257         for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7258                 if (!valid_vl(i))
7259                         continue;
7260                 this_shared_changing = new_bc->vl[i].shared
7261                                                 != cur_bc.vl[i].shared;
7262                 if (this_shared_changing)
7263                         any_shared_limit_changing = 1;
7264                 if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7265                                 || this_shared_changing) {
7266                         changing[i] = 1;
7267                         changing_mask |= stat_mask;
7268                         change_count++;
7269                 }
7270                 if (be16_to_cpu(new_bc->vl[i].dedicated) <
7271                                         be16_to_cpu(cur_bc.vl[i].dedicated)) {
7272                         lowering_dedicated[i] = 1;
7273                         ld_mask |= stat_mask;
7274                 }
7275         }
7276
7277         /* bracket the credit change with a total adjustment */
7278         if (new_total > cur_total)
7279                 set_global_limit(dd, new_total);
7280
7281         /*
7282          * Start the credit change algorithm.
7283          */
7284         use_all_mask = 0;
7285         if ((be16_to_cpu(new_bc->overall_shared_limit) <
7286                                 be16_to_cpu(cur_bc.overall_shared_limit))
7287                         || (is_a0(dd) && any_shared_limit_changing)) {
7288                 set_global_shared(dd, 0);
7289                 cur_bc.overall_shared_limit = 0;
7290                 use_all_mask = 1;
7291         }
7292
7293         for (i = 0; i < NUM_USABLE_VLS; i++) {
7294                 if (!valid_vl(i))
7295                         continue;
7296
7297                 if (changing[i]) {
7298                         set_vl_shared(dd, i, 0);
7299                         cur_bc.vl[i].shared = 0;
7300                 }
7301         }
7302
7303         wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7304                 "shared");
7305
7306         if (change_count > 0) {
7307                 for (i = 0; i < NUM_USABLE_VLS; i++) {
7308                         if (!valid_vl(i))
7309                                 continue;
7310
7311                         if (lowering_dedicated[i]) {
7312                                 set_vl_dedicated(dd, i,
7313                                         be16_to_cpu(new_bc->vl[i].dedicated));
7314                                 cur_bc.vl[i].dedicated =
7315                                                 new_bc->vl[i].dedicated;
7316                         }
7317                 }
7318
7319                 wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7320
7321                 /* now raise all dedicated that are going up */
7322                 for (i = 0; i < NUM_USABLE_VLS; i++) {
7323                         if (!valid_vl(i))
7324                                 continue;
7325
7326                         if (be16_to_cpu(new_bc->vl[i].dedicated) >
7327                                         be16_to_cpu(cur_bc.vl[i].dedicated))
7328                                 set_vl_dedicated(dd, i,
7329                                         be16_to_cpu(new_bc->vl[i].dedicated));
7330                 }
7331         }
7332
7333         /* next raise all shared that are going up */
7334         for (i = 0; i < NUM_USABLE_VLS; i++) {
7335                 if (!valid_vl(i))
7336                         continue;
7337
7338                 if (be16_to_cpu(new_bc->vl[i].shared) >
7339                                 be16_to_cpu(cur_bc.vl[i].shared))
7340                         set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7341         }
7342
7343         /* finally raise the global shared */
7344         if (be16_to_cpu(new_bc->overall_shared_limit) >
7345                         be16_to_cpu(cur_bc.overall_shared_limit))
7346                 set_global_shared(dd,
7347                         be16_to_cpu(new_bc->overall_shared_limit));
7348
7349         /* bracket the credit change with a total adjustment */
7350         if (new_total < cur_total)
7351                 set_global_limit(dd, new_total);
7352         return 0;
7353 }
7354
7355 /*
7356  * Read the given fabric manager table. Return the size of the
7357  * table (in bytes) on success, and a negative error code on
7358  * failure.
7359  */
7360 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7361
7362 {
7363         int size;
7364         struct vl_arb_cache *vlc;
7365
7366         switch (which) {
7367         case FM_TBL_VL_HIGH_ARB:
7368                 size = 256;
7369                 /*
7370                  * OPA specifies 128 elements (of 2 bytes each), though
7371                  * HFI supports only 16 elements in h/w.
7372                  */
7373                 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7374                 vl_arb_get_cache(vlc, t);
7375                 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7376                 break;
7377         case FM_TBL_VL_LOW_ARB:
7378                 size = 256;
7379                 /*
7380                  * OPA specifies 128 elements (of 2 bytes each), though
7381                  * HFI supports only 16 elements in h/w.
7382                  */
7383                 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7384                 vl_arb_get_cache(vlc, t);
7385                 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7386                 break;
7387         case FM_TBL_BUFFER_CONTROL:
7388                 size = get_buffer_control(ppd->dd, t, NULL);
7389                 break;
7390         case FM_TBL_SC2VLNT:
7391                 size = get_sc2vlnt(ppd->dd, t);
7392                 break;
7393         case FM_TBL_VL_PREEMPT_ELEMS:
7394                 size = 256;
7395                 /* OPA specifies 128 elements, of 2 bytes each */
7396                 get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7397                 break;
7398         case FM_TBL_VL_PREEMPT_MATRIX:
7399                 size = 256;
7400                 /*
7401                  * OPA specifies that this is the same size as the VL
7402                  * arbitration tables (i.e., 256 bytes).
7403                  */
7404                 break;
7405         default:
7406                 return -EINVAL;
7407         }
7408         return size;
7409 }
7410
7411 /*
7412  * Write the given fabric manager table.
7413  */
7414 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7415 {
7416         int ret = 0;
7417         struct vl_arb_cache *vlc;
7418
7419         switch (which) {
7420         case FM_TBL_VL_HIGH_ARB:
7421                 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7422                 if (vl_arb_match_cache(vlc, t)) {
7423                         vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7424                         break;
7425                 }
7426                 vl_arb_set_cache(vlc, t);
7427                 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7428                 ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7429                                      VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7430                 break;
7431         case FM_TBL_VL_LOW_ARB:
7432                 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7433                 if (vl_arb_match_cache(vlc, t)) {
7434                         vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7435                         break;
7436                 }
7437                 vl_arb_set_cache(vlc, t);
7438                 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7439                 ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7440                                      VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7441                 break;
7442         case FM_TBL_BUFFER_CONTROL:
7443                 ret = set_buffer_control(ppd->dd, t);
7444                 break;
7445         case FM_TBL_SC2VLNT:
7446                 set_sc2vlnt(ppd->dd, t);
7447                 break;
7448         default:
7449                 ret = -EINVAL;
7450         }
7451         return ret;
7452 }
7453
7454 /*
7455  * Disable all data VLs.
7456  *
7457  * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7458  */
7459 static int disable_data_vls(struct hfi1_devdata *dd)
7460 {
7461         if (is_a0(dd))
7462                 return 1;
7463
7464         pio_send_control(dd, PSC_DATA_VL_DISABLE);
7465
7466         return 0;
7467 }
7468
7469 /*
7470  * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7471  * Just re-enables all data VLs (the "fill" part happens
7472  * automatically - the name was chosen for symmetry with
7473  * stop_drain_data_vls()).
7474  *
7475  * Return 0 if successful, non-zero if the VLs cannot be enabled.
7476  */
7477 int open_fill_data_vls(struct hfi1_devdata *dd)
7478 {
7479         if (is_a0(dd))
7480                 return 1;
7481
7482         pio_send_control(dd, PSC_DATA_VL_ENABLE);
7483
7484         return 0;
7485 }
7486
7487 /*
7488  * drain_data_vls() - assumes that disable_data_vls() has been called,
7489  * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7490  * engines to drop to 0.
7491  */
7492 static void drain_data_vls(struct hfi1_devdata *dd)
7493 {
7494         sc_wait(dd);
7495         sdma_wait(dd);
7496         pause_for_credit_return(dd);
7497 }
7498
7499 /*
7500  * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7501  *
7502  * Use open_fill_data_vls() to resume using data VLs.  This pair is
7503  * meant to be used like this:
7504  *
7505  * stop_drain_data_vls(dd);
7506  * // do things with per-VL resources
7507  * open_fill_data_vls(dd);
7508  */
7509 int stop_drain_data_vls(struct hfi1_devdata *dd)
7510 {
7511         int ret;
7512
7513         ret = disable_data_vls(dd);
7514         if (ret == 0)
7515                 drain_data_vls(dd);
7516
7517         return ret;
7518 }
7519
7520 /*
7521  * Convert a nanosecond time to a cclock count.  No matter how slow
7522  * the cclock, a non-zero ns will always have a non-zero result.
7523  */
7524 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7525 {
7526         u32 cclocks;
7527
7528         if (dd->icode == ICODE_FPGA_EMULATION)
7529                 cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7530         else  /* simulation pretends to be ASIC */
7531                 cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7532         if (ns && !cclocks)     /* if ns nonzero, must be at least 1 */
7533                 cclocks = 1;
7534         return cclocks;
7535 }
7536
7537 /*
7538  * Convert a cclock count to nanoseconds. Not matter how slow
7539  * the cclock, a non-zero cclocks will always have a non-zero result.
7540  */
7541 u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7542 {
7543         u32 ns;
7544
7545         if (dd->icode == ICODE_FPGA_EMULATION)
7546                 ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7547         else  /* simulation pretends to be ASIC */
7548                 ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7549         if (cclocks && !ns)
7550                 ns = 1;
7551         return ns;
7552 }
7553
7554 /*
7555  * Dynamically adjust the receive interrupt timeout for a context based on
7556  * incoming packet rate.
7557  *
7558  * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7559  */
7560 static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7561 {
7562         struct hfi1_devdata *dd = rcd->dd;
7563         u32 timeout = rcd->rcvavail_timeout;
7564
7565         /*
7566          * This algorithm doubles or halves the timeout depending on whether
7567          * the number of packets received in this interrupt were less than or
7568          * greater equal the interrupt count.
7569          *
7570          * The calculations below do not allow a steady state to be achieved.
7571          * Only at the endpoints it is possible to have an unchanging
7572          * timeout.
7573          */
7574         if (npkts < rcv_intr_count) {
7575                 /*
7576                  * Not enough packets arrived before the timeout, adjust
7577                  * timeout downward.
7578                  */
7579                 if (timeout < 2) /* already at minimum? */
7580                         return;
7581                 timeout >>= 1;
7582         } else {
7583                 /*
7584                  * More than enough packets arrived before the timeout, adjust
7585                  * timeout upward.
7586                  */
7587                 if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7588                         return;
7589                 timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7590         }
7591
7592         rcd->rcvavail_timeout = timeout;
7593         /* timeout cannot be larger than rcv_intr_timeout_csr which has already
7594            been verified to be in range */
7595         write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7596                 (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7597 }
7598
7599 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7600                     u32 intr_adjust, u32 npkts)
7601 {
7602         struct hfi1_devdata *dd = rcd->dd;
7603         u64 reg;
7604         u32 ctxt = rcd->ctxt;
7605
7606         /*
7607          * Need to write timeout register before updating RcvHdrHead to ensure
7608          * that a new value is used when the HW decides to restart counting.
7609          */
7610         if (intr_adjust)
7611                 adjust_rcv_timeout(rcd, npkts);
7612         if (updegr) {
7613                 reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7614                         << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7615                 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7616         }
7617         mmiowb();
7618         reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7619                 (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7620                         << RCV_HDR_HEAD_HEAD_SHIFT);
7621         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7622         mmiowb();
7623 }
7624
7625 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7626 {
7627         u32 head, tail;
7628
7629         head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7630                 & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7631
7632         if (rcd->rcvhdrtail_kvaddr)
7633                 tail = get_rcvhdrtail(rcd);
7634         else
7635                 tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7636
7637         return head == tail;
7638 }
7639
7640 /*
7641  * Context Control and Receive Array encoding for buffer size:
7642  *      0x0 invalid
7643  *      0x1   4 KB
7644  *      0x2   8 KB
7645  *      0x3  16 KB
7646  *      0x4  32 KB
7647  *      0x5  64 KB
7648  *      0x6 128 KB
7649  *      0x7 256 KB
7650  *      0x8 512 KB (Receive Array only)
7651  *      0x9   1 MB (Receive Array only)
7652  *      0xa   2 MB (Receive Array only)
7653  *
7654  *      0xB-0xF - reserved (Receive Array only)
7655  *
7656  *
7657  * This routine assumes that the value has already been sanity checked.
7658  */
7659 static u32 encoded_size(u32 size)
7660 {
7661         switch (size) {
7662         case   4*1024: return 0x1;
7663         case   8*1024: return 0x2;
7664         case  16*1024: return 0x3;
7665         case  32*1024: return 0x4;
7666         case  64*1024: return 0x5;
7667         case 128*1024: return 0x6;
7668         case 256*1024: return 0x7;
7669         case 512*1024: return 0x8;
7670         case   1*1024*1024: return 0x9;
7671         case   2*1024*1024: return 0xa;
7672         }
7673         return 0x1;     /* if invalid, go with the minimum size */
7674 }
7675
7676 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7677 {
7678         struct hfi1_ctxtdata *rcd;
7679         u64 rcvctrl, reg;
7680         int did_enable = 0;
7681
7682         rcd = dd->rcd[ctxt];
7683         if (!rcd)
7684                 return;
7685
7686         hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7687
7688         rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7689         /* if the context already enabled, don't do the extra steps */
7690         if ((op & HFI1_RCVCTRL_CTXT_ENB)
7691                         && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7692                 /* reset the tail and hdr addresses, and sequence count */
7693                 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7694                                 rcd->rcvhdrq_phys);
7695                 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7696                         write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7697                                         rcd->rcvhdrqtailaddr_phys);
7698                 rcd->seq_cnt = 1;
7699
7700                 /* reset the cached receive header queue head value */
7701                 rcd->head = 0;
7702
7703                 /*
7704                  * Zero the receive header queue so we don't get false
7705                  * positives when checking the sequence number.  The
7706                  * sequence numbers could land exactly on the same spot.
7707                  * E.g. a rcd restart before the receive header wrapped.
7708                  */
7709                 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7710
7711                 /* starting timeout */
7712                 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7713
7714                 /* enable the context */
7715                 rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7716
7717                 /* clean the egr buffer size first */
7718                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7719                 rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7720                                 & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7721                                         << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7722
7723                 /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7724                 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7725                 did_enable = 1;
7726
7727                 /* zero RcvEgrIndexHead */
7728                 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7729
7730                 /* set eager count and base index */
7731                 reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7732                         & RCV_EGR_CTRL_EGR_CNT_MASK)
7733                        << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7734                         (((rcd->eager_base >> RCV_SHIFT)
7735                           & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7736                          << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7737                 write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7738
7739                 /*
7740                  * Set TID (expected) count and base index.
7741                  * rcd->expected_count is set to individual RcvArray entries,
7742                  * not pairs, and the CSR takes a pair-count in groups of
7743                  * four, so divide by 8.
7744                  */
7745                 reg = (((rcd->expected_count >> RCV_SHIFT)
7746                                         & RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7747                                 << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7748                       (((rcd->expected_base >> RCV_SHIFT)
7749                                         & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7750                                 << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7751                 write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7752                 if (ctxt == VL15CTXT)
7753                         write_csr(dd, RCV_VL15, VL15CTXT);
7754         }
7755         if (op & HFI1_RCVCTRL_CTXT_DIS) {
7756                 write_csr(dd, RCV_VL15, 0);
7757                 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7758         }
7759         if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7760                 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7761         if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7762                 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7763         if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7764                 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7765         if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7766                 rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7767         if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7768                 rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7769         if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7770                 rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7771         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7772                 /* In one-packet-per-eager mode, the size comes from
7773                    the RcvArray entry. */
7774                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7775                 rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7776         }
7777         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7778                 rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7779         if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7780                 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7781         if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7782                 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7783         if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7784                 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7785         if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7786                 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7787         rcd->rcvctrl = rcvctrl;
7788         hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7789         write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7790
7791         /* work around sticky RcvCtxtStatus.BlockedRHQFull */
7792         if (did_enable
7793             && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7794                 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7795                 if (reg != 0) {
7796                         dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7797                                 ctxt, reg);
7798                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7799                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7800                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7801                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7802                         reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7803                         dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7804                                 ctxt, reg, reg == 0 ? "not" : "still");
7805                 }
7806         }
7807
7808         if (did_enable) {
7809                 /*
7810                  * The interrupt timeout and count must be set after
7811                  * the context is enabled to take effect.
7812                  */
7813                 /* set interrupt timeout */
7814                 write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7815                         (u64)rcd->rcvavail_timeout <<
7816                                 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7817
7818                 /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7819                 reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7820                 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7821         }
7822
7823         if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7824                 /*
7825                  * If the context has been disabled and the Tail Update has
7826                  * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7827                  * it doesn't contain an address that is invalid.
7828                  */
7829                 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7830 }
7831
7832 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7833                     u64 **cntrp)
7834 {
7835         int ret;
7836         u64 val = 0;
7837
7838         if (namep) {
7839                 ret = dd->cntrnameslen;
7840                 if (pos != 0) {
7841                         dd_dev_err(dd, "read_cntrs does not support indexing");
7842                         return 0;
7843                 }
7844                 *namep = dd->cntrnames;
7845         } else {
7846                 const struct cntr_entry *entry;
7847                 int i, j;
7848
7849                 ret = (dd->ndevcntrs) * sizeof(u64);
7850                 if (pos != 0) {
7851                         dd_dev_err(dd, "read_cntrs does not support indexing");
7852                         return 0;
7853                 }
7854
7855                 /* Get the start of the block of counters */
7856                 *cntrp = dd->cntrs;
7857
7858                 /*
7859                  * Now go and fill in each counter in the block.
7860                  */
7861                 for (i = 0; i < DEV_CNTR_LAST; i++) {
7862                         entry = &dev_cntrs[i];
7863                         hfi1_cdbg(CNTR, "reading %s", entry->name);
7864                         if (entry->flags & CNTR_DISABLED) {
7865                                 /* Nothing */
7866                                 hfi1_cdbg(CNTR, "\tDisabled\n");
7867                         } else {
7868                                 if (entry->flags & CNTR_VL) {
7869                                         hfi1_cdbg(CNTR, "\tPer VL\n");
7870                                         for (j = 0; j < C_VL_COUNT; j++) {
7871                                                 val = entry->rw_cntr(entry,
7872                                                                   dd, j,
7873                                                                   CNTR_MODE_R,
7874                                                                   0);
7875                                                 hfi1_cdbg(
7876                                                    CNTR,
7877                                                    "\t\tRead 0x%llx for %d\n",
7878                                                    val, j);
7879                                                 dd->cntrs[entry->offset + j] =
7880                                                                             val;
7881                                         }
7882                                 } else {
7883                                         val = entry->rw_cntr(entry, dd,
7884                                                         CNTR_INVALID_VL,
7885                                                         CNTR_MODE_R, 0);
7886                                         dd->cntrs[entry->offset] = val;
7887                                         hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7888                                 }
7889                         }
7890                 }
7891         }
7892         return ret;
7893 }
7894
7895 /*
7896  * Used by sysfs to create files for hfi stats to read
7897  */
7898 u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7899                         char **namep, u64 **cntrp)
7900 {
7901         int ret;
7902         u64 val = 0;
7903
7904         if (namep) {
7905                 ret = dd->portcntrnameslen;
7906                 if (pos != 0) {
7907                         dd_dev_err(dd, "index not supported");
7908                         return 0;
7909                 }
7910                 *namep = dd->portcntrnames;
7911         } else {
7912                 const struct cntr_entry *entry;
7913                 struct hfi1_pportdata *ppd;
7914                 int i, j;
7915
7916                 ret = (dd->nportcntrs) * sizeof(u64);
7917                 if (pos != 0) {
7918                         dd_dev_err(dd, "indexing not supported");
7919                         return 0;
7920                 }
7921                 ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7922                 *cntrp = ppd->cntrs;
7923
7924                 for (i = 0; i < PORT_CNTR_LAST; i++) {
7925                         entry = &port_cntrs[i];
7926                         hfi1_cdbg(CNTR, "reading %s", entry->name);
7927                         if (entry->flags & CNTR_DISABLED) {
7928                                 /* Nothing */
7929                                 hfi1_cdbg(CNTR, "\tDisabled\n");
7930                                 continue;
7931                         }
7932
7933                         if (entry->flags & CNTR_VL) {
7934                                 hfi1_cdbg(CNTR, "\tPer VL");
7935                                 for (j = 0; j < C_VL_COUNT; j++) {
7936                                         val = entry->rw_cntr(entry, ppd, j,
7937                                                                CNTR_MODE_R,
7938                                                                0);
7939                                         hfi1_cdbg(
7940                                            CNTR,
7941                                            "\t\tRead 0x%llx for %d",
7942                                            val, j);
7943                                         ppd->cntrs[entry->offset + j] = val;
7944                                 }
7945                         } else {
7946                                 val = entry->rw_cntr(entry, ppd,
7947                                                        CNTR_INVALID_VL,
7948                                                        CNTR_MODE_R,
7949                                                        0);
7950                                 ppd->cntrs[entry->offset] = val;
7951                                 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7952                         }
7953                 }
7954         }
7955         return ret;
7956 }
7957
7958 static void free_cntrs(struct hfi1_devdata *dd)
7959 {
7960         struct hfi1_pportdata *ppd;
7961         int i;
7962
7963         if (dd->synth_stats_timer.data)
7964                 del_timer_sync(&dd->synth_stats_timer);
7965         dd->synth_stats_timer.data = 0;
7966         ppd = (struct hfi1_pportdata *)(dd + 1);
7967         for (i = 0; i < dd->num_pports; i++, ppd++) {
7968                 kfree(ppd->cntrs);
7969                 kfree(ppd->scntrs);
7970                 free_percpu(ppd->ibport_data.rc_acks);
7971                 free_percpu(ppd->ibport_data.rc_qacks);
7972                 free_percpu(ppd->ibport_data.rc_delayed_comp);
7973                 ppd->cntrs = NULL;
7974                 ppd->scntrs = NULL;
7975                 ppd->ibport_data.rc_acks = NULL;
7976                 ppd->ibport_data.rc_qacks = NULL;
7977                 ppd->ibport_data.rc_delayed_comp = NULL;
7978         }
7979         kfree(dd->portcntrnames);
7980         dd->portcntrnames = NULL;
7981         kfree(dd->cntrs);
7982         dd->cntrs = NULL;
7983         kfree(dd->scntrs);
7984         dd->scntrs = NULL;
7985         kfree(dd->cntrnames);
7986         dd->cntrnames = NULL;
7987 }
7988
7989 #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7990 #define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7991
7992 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7993                               u64 *psval, void *context, int vl)
7994 {
7995         u64 val;
7996         u64 sval = *psval;
7997
7998         if (entry->flags & CNTR_DISABLED) {
7999                 dd_dev_err(dd, "Counter %s not enabled", entry->name);
8000                 return 0;
8001         }
8002
8003         hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8004
8005         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8006
8007         /* If its a synthetic counter there is more work we need to do */
8008         if (entry->flags & CNTR_SYNTH) {
8009                 if (sval == CNTR_MAX) {
8010                         /* No need to read already saturated */
8011                         return CNTR_MAX;
8012                 }
8013
8014                 if (entry->flags & CNTR_32BIT) {
8015                         /* 32bit counters can wrap multiple times */
8016                         u64 upper = sval >> 32;
8017                         u64 lower = (sval << 32) >> 32;
8018
8019                         if (lower > val) { /* hw wrapped */
8020                                 if (upper == CNTR_32BIT_MAX)
8021                                         val = CNTR_MAX;
8022                                 else
8023                                         upper++;
8024                         }
8025
8026                         if (val != CNTR_MAX)
8027                                 val = (upper << 32) | val;
8028
8029                 } else {
8030                         /* If we rolled we are saturated */
8031                         if ((val < sval) || (val > CNTR_MAX))
8032                                 val = CNTR_MAX;
8033                 }
8034         }
8035
8036         *psval = val;
8037
8038         hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8039
8040         return val;
8041 }
8042
8043 static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8044                                struct cntr_entry *entry,
8045                                u64 *psval, void *context, int vl, u64 data)
8046 {
8047         u64 val;
8048
8049         if (entry->flags & CNTR_DISABLED) {
8050                 dd_dev_err(dd, "Counter %s not enabled", entry->name);
8051                 return 0;
8052         }
8053
8054         hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8055
8056         if (entry->flags & CNTR_SYNTH) {
8057                 *psval = data;
8058                 if (entry->flags & CNTR_32BIT) {
8059                         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8060                                              (data << 32) >> 32);
8061                         val = data; /* return the full 64bit value */
8062                 } else {
8063                         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8064                                              data);
8065                 }
8066         } else {
8067                 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8068         }
8069
8070         *psval = val;
8071
8072         hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8073
8074         return val;
8075 }
8076
8077 u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8078 {
8079         struct cntr_entry *entry;
8080         u64 *sval;
8081
8082         entry = &dev_cntrs[index];
8083         sval = dd->scntrs + entry->offset;
8084
8085         if (vl != CNTR_INVALID_VL)
8086                 sval += vl;
8087
8088         return read_dev_port_cntr(dd, entry, sval, dd, vl);
8089 }
8090
8091 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8092 {
8093         struct cntr_entry *entry;
8094         u64 *sval;
8095
8096         entry = &dev_cntrs[index];
8097         sval = dd->scntrs + entry->offset;
8098
8099         if (vl != CNTR_INVALID_VL)
8100                 sval += vl;
8101
8102         return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8103 }
8104
8105 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8106 {
8107         struct cntr_entry *entry;
8108         u64 *sval;
8109
8110         entry = &port_cntrs[index];
8111         sval = ppd->scntrs + entry->offset;
8112
8113         if (vl != CNTR_INVALID_VL)
8114                 sval += vl;
8115
8116         if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8117             (index <= C_RCV_HDR_OVF_LAST)) {
8118                 /* We do not want to bother for disabled contexts */
8119                 return 0;
8120         }
8121
8122         return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8123 }
8124
8125 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8126 {
8127         struct cntr_entry *entry;
8128         u64 *sval;
8129
8130         entry = &port_cntrs[index];
8131         sval = ppd->scntrs + entry->offset;
8132
8133         if (vl != CNTR_INVALID_VL)
8134                 sval += vl;
8135
8136         if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8137             (index <= C_RCV_HDR_OVF_LAST)) {
8138                 /* We do not want to bother for disabled contexts */
8139                 return 0;
8140         }
8141
8142         return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8143 }
8144
8145 static void update_synth_timer(unsigned long opaque)
8146 {
8147         u64 cur_tx;
8148         u64 cur_rx;
8149         u64 total_flits;
8150         u8 update = 0;
8151         int i, j, vl;
8152         struct hfi1_pportdata *ppd;
8153         struct cntr_entry *entry;
8154
8155         struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8156
8157         /*
8158          * Rather than keep beating on the CSRs pick a minimal set that we can
8159          * check to watch for potential roll over. We can do this by looking at
8160          * the number of flits sent/recv. If the total flits exceeds 32bits then
8161          * we have to iterate all the counters and update.
8162          */
8163         entry = &dev_cntrs[C_DC_RCV_FLITS];
8164         cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8165
8166         entry = &dev_cntrs[C_DC_XMIT_FLITS];
8167         cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8168
8169         hfi1_cdbg(
8170             CNTR,
8171             "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8172             dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8173
8174         if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8175                 /*
8176                  * May not be strictly necessary to update but it won't hurt and
8177                  * simplifies the logic here.
8178                  */
8179                 update = 1;
8180                 hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8181                           dd->unit);
8182         } else {
8183                 total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8184                 hfi1_cdbg(CNTR,
8185                           "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8186                           total_flits, (u64)CNTR_32BIT_MAX);
8187                 if (total_flits >= CNTR_32BIT_MAX) {
8188                         hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8189                                   dd->unit);
8190                         update = 1;
8191                 }
8192         }
8193
8194         if (update) {
8195                 hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8196                 for (i = 0; i < DEV_CNTR_LAST; i++) {
8197                         entry = &dev_cntrs[i];
8198                         if (entry->flags & CNTR_VL) {
8199                                 for (vl = 0; vl < C_VL_COUNT; vl++)
8200                                         read_dev_cntr(dd, i, vl);
8201                         } else {
8202                                 read_dev_cntr(dd, i, CNTR_INVALID_VL);
8203                         }
8204                 }
8205                 ppd = (struct hfi1_pportdata *)(dd + 1);
8206                 for (i = 0; i < dd->num_pports; i++, ppd++) {
8207                         for (j = 0; j < PORT_CNTR_LAST; j++) {
8208                                 entry = &port_cntrs[j];
8209                                 if (entry->flags & CNTR_VL) {
8210                                         for (vl = 0; vl < C_VL_COUNT; vl++)
8211                                                 read_port_cntr(ppd, j, vl);
8212                                 } else {
8213                                         read_port_cntr(ppd, j, CNTR_INVALID_VL);
8214                                 }
8215                         }
8216                 }
8217
8218                 /*
8219                  * We want the value in the register. The goal is to keep track
8220                  * of the number of "ticks" not the counter value. In other
8221                  * words if the register rolls we want to notice it and go ahead
8222                  * and force an update.
8223                  */
8224                 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8225                 dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8226                                                 CNTR_MODE_R, 0);
8227
8228                 entry = &dev_cntrs[C_DC_RCV_FLITS];
8229                 dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8230                                                 CNTR_MODE_R, 0);
8231
8232                 hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8233                           dd->unit, dd->last_tx, dd->last_rx);
8234
8235         } else {
8236                 hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8237         }
8238
8239 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8240 }
8241
8242 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
8243 static int init_cntrs(struct hfi1_devdata *dd)
8244 {
8245         int i, rcv_ctxts, index, j;
8246         size_t sz;
8247         char *p;
8248         char name[C_MAX_NAME];
8249         struct hfi1_pportdata *ppd;
8250
8251         /* set up the stats timer; the add_timer is done at the end */
8252         setup_timer(&dd->synth_stats_timer, update_synth_timer,
8253                     (unsigned long)dd);
8254
8255         /***********************/
8256         /* per device counters */
8257         /***********************/
8258
8259         /* size names and determine how many we have*/
8260         dd->ndevcntrs = 0;
8261         sz = 0;
8262         index = 0;
8263
8264         for (i = 0; i < DEV_CNTR_LAST; i++) {
8265                 hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8266                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8267                         hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8268                         continue;
8269                 }
8270
8271                 if (dev_cntrs[i].flags & CNTR_VL) {
8272                         hfi1_dbg_early("\tProcessing VL cntr\n");
8273                         dev_cntrs[i].offset = index;
8274                         for (j = 0; j < C_VL_COUNT; j++) {
8275                                 memset(name, '\0', C_MAX_NAME);
8276                                 snprintf(name, C_MAX_NAME, "%s%d",
8277                                         dev_cntrs[i].name,
8278                                         vl_from_idx(j));
8279                                 sz += strlen(name);
8280                                 sz++;
8281                                 hfi1_dbg_early("\t\t%s\n", name);
8282                                 dd->ndevcntrs++;
8283                                 index++;
8284                         }
8285                 } else {
8286                         /* +1 for newline  */
8287                         sz += strlen(dev_cntrs[i].name) + 1;
8288                         dd->ndevcntrs++;
8289                         dev_cntrs[i].offset = index;
8290                         index++;
8291                         hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8292                 }
8293         }
8294
8295         /* allocate space for the counter values */
8296         dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8297         if (!dd->cntrs)
8298                 goto bail;
8299
8300         dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8301         if (!dd->scntrs)
8302                 goto bail;
8303
8304
8305         /* allocate space for the counter names */
8306         dd->cntrnameslen = sz;
8307         dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8308         if (!dd->cntrnames)
8309                 goto bail;
8310
8311         /* fill in the names */
8312         for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8313                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8314                         /* Nothing */
8315                 } else {
8316                         if (dev_cntrs[i].flags & CNTR_VL) {
8317                                 for (j = 0; j < C_VL_COUNT; j++) {
8318                                         memset(name, '\0', C_MAX_NAME);
8319                                         snprintf(name, C_MAX_NAME, "%s%d",
8320                                                 dev_cntrs[i].name,
8321                                                 vl_from_idx(j));
8322                                         memcpy(p, name, strlen(name));
8323                                         p += strlen(name);
8324                                         *p++ = '\n';
8325                                 }
8326                         } else {
8327                                 memcpy(p, dev_cntrs[i].name,
8328                                        strlen(dev_cntrs[i].name));
8329                                 p += strlen(dev_cntrs[i].name);
8330                                 *p++ = '\n';
8331                         }
8332                         index++;
8333                 }
8334         }
8335
8336         /*********************/
8337         /* per port counters */
8338         /*********************/
8339
8340         /*
8341          * Go through the counters for the overflows and disable the ones we
8342          * don't need. This varies based on platform so we need to do it
8343          * dynamically here.
8344          */
8345         rcv_ctxts = dd->num_rcv_contexts;
8346         for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8347              i <= C_RCV_HDR_OVF_LAST; i++) {
8348                 port_cntrs[i].flags |= CNTR_DISABLED;
8349         }
8350
8351         /* size port counter names and determine how many we have*/
8352         sz = 0;
8353         dd->nportcntrs = 0;
8354         for (i = 0; i < PORT_CNTR_LAST; i++) {
8355                 hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8356                 if (port_cntrs[i].flags & CNTR_DISABLED) {
8357                         hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8358                         continue;
8359                 }
8360
8361                 if (port_cntrs[i].flags & CNTR_VL) {
8362                         hfi1_dbg_early("\tProcessing VL cntr\n");
8363                         port_cntrs[i].offset = dd->nportcntrs;
8364                         for (j = 0; j < C_VL_COUNT; j++) {
8365                                 memset(name, '\0', C_MAX_NAME);
8366                                 snprintf(name, C_MAX_NAME, "%s%d",
8367                                         port_cntrs[i].name,
8368                                         vl_from_idx(j));
8369                                 sz += strlen(name);
8370                                 sz++;
8371                                 hfi1_dbg_early("\t\t%s\n", name);
8372                                 dd->nportcntrs++;
8373                         }
8374                 } else {
8375                         /* +1 for newline  */
8376                         sz += strlen(port_cntrs[i].name) + 1;
8377                         port_cntrs[i].offset = dd->nportcntrs;
8378                         dd->nportcntrs++;
8379                         hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8380                 }
8381         }
8382
8383         /* allocate space for the counter names */
8384         dd->portcntrnameslen = sz;
8385         dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8386         if (!dd->portcntrnames)
8387                 goto bail;
8388
8389         /* fill in port cntr names */
8390         for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8391                 if (port_cntrs[i].flags & CNTR_DISABLED)
8392                         continue;
8393
8394                 if (port_cntrs[i].flags & CNTR_VL) {
8395                         for (j = 0; j < C_VL_COUNT; j++) {
8396                                 memset(name, '\0', C_MAX_NAME);
8397                                 snprintf(name, C_MAX_NAME, "%s%d",
8398                                         port_cntrs[i].name,
8399                                         vl_from_idx(j));
8400                                 memcpy(p, name, strlen(name));
8401                                 p += strlen(name);
8402                                 *p++ = '\n';
8403                         }
8404                 } else {
8405                         memcpy(p, port_cntrs[i].name,
8406                                strlen(port_cntrs[i].name));
8407                         p += strlen(port_cntrs[i].name);
8408                         *p++ = '\n';
8409                 }
8410         }
8411
8412         /* allocate per port storage for counter values */
8413         ppd = (struct hfi1_pportdata *)(dd + 1);
8414         for (i = 0; i < dd->num_pports; i++, ppd++) {
8415                 ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8416                 if (!ppd->cntrs)
8417                         goto bail;
8418
8419                 ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8420                 if (!ppd->scntrs)
8421                         goto bail;
8422         }
8423
8424         /* CPU counters need to be allocated and zeroed */
8425         if (init_cpu_counters(dd))
8426                 goto bail;
8427
8428         mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8429         return 0;
8430 bail:
8431         free_cntrs(dd);
8432         return -ENOMEM;
8433 }
8434
8435
8436 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8437 {
8438         switch (chip_lstate) {
8439         default:
8440                 dd_dev_err(dd,
8441                          "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8442                          chip_lstate);
8443                 /* fall through */
8444         case LSTATE_DOWN:
8445                 return IB_PORT_DOWN;
8446         case LSTATE_INIT:
8447                 return IB_PORT_INIT;
8448         case LSTATE_ARMED:
8449                 return IB_PORT_ARMED;
8450         case LSTATE_ACTIVE:
8451                 return IB_PORT_ACTIVE;
8452         }
8453 }
8454
8455 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8456 {
8457         /* look at the HFI meta-states only */
8458         switch (chip_pstate & 0xf0) {
8459         default:
8460                 dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8461                         chip_pstate);
8462                 /* fall through */
8463         case PLS_DISABLED:
8464                 return IB_PORTPHYSSTATE_DISABLED;
8465         case PLS_OFFLINE:
8466                 return OPA_PORTPHYSSTATE_OFFLINE;
8467         case PLS_POLLING:
8468                 return IB_PORTPHYSSTATE_POLLING;
8469         case PLS_CONFIGPHY:
8470                 return IB_PORTPHYSSTATE_TRAINING;
8471         case PLS_LINKUP:
8472                 return IB_PORTPHYSSTATE_LINKUP;
8473         case PLS_PHYTEST:
8474                 return IB_PORTPHYSSTATE_PHY_TEST;
8475         }
8476 }
8477
8478 /* return the OPA port logical state name */
8479 const char *opa_lstate_name(u32 lstate)
8480 {
8481         static const char * const port_logical_names[] = {
8482                 "PORT_NOP",
8483                 "PORT_DOWN",
8484                 "PORT_INIT",
8485                 "PORT_ARMED",
8486                 "PORT_ACTIVE",
8487                 "PORT_ACTIVE_DEFER",
8488         };
8489         if (lstate < ARRAY_SIZE(port_logical_names))
8490                 return port_logical_names[lstate];
8491         return "unknown";
8492 }
8493
8494 /* return the OPA port physical state name */
8495 const char *opa_pstate_name(u32 pstate)
8496 {
8497         static const char * const port_physical_names[] = {
8498                 "PHYS_NOP",
8499                 "reserved1",
8500                 "PHYS_POLL",
8501                 "PHYS_DISABLED",
8502                 "PHYS_TRAINING",
8503                 "PHYS_LINKUP",
8504                 "PHYS_LINK_ERR_RECOVER",
8505                 "PHYS_PHY_TEST",
8506                 "reserved8",
8507                 "PHYS_OFFLINE",
8508                 "PHYS_GANGED",
8509                 "PHYS_TEST",
8510         };
8511         if (pstate < ARRAY_SIZE(port_physical_names))
8512                 return port_physical_names[pstate];
8513         return "unknown";
8514 }
8515
8516 /*
8517  * Read the hardware link state and set the driver's cached value of it.
8518  * Return the (new) current value.
8519  */
8520 u32 get_logical_state(struct hfi1_pportdata *ppd)
8521 {
8522         u32 new_state;
8523
8524         new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8525         if (new_state != ppd->lstate) {
8526                 dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8527                         opa_lstate_name(new_state), new_state);
8528                 ppd->lstate = new_state;
8529         }
8530         /*
8531          * Set port status flags in the page mapped into userspace
8532          * memory. Do it here to ensure a reliable state - this is
8533          * the only function called by all state handling code.
8534          * Always set the flags due to the fact that the cache value
8535          * might have been changed explicitly outside of this
8536          * function.
8537          */
8538         if (ppd->statusp) {
8539                 switch (ppd->lstate) {
8540                 case IB_PORT_DOWN:
8541                 case IB_PORT_INIT:
8542                         *ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8543                                            HFI1_STATUS_IB_READY);
8544                         break;
8545                 case IB_PORT_ARMED:
8546                         *ppd->statusp |= HFI1_STATUS_IB_CONF;
8547                         break;
8548                 case IB_PORT_ACTIVE:
8549                         *ppd->statusp |= HFI1_STATUS_IB_READY;
8550                         break;
8551                 }
8552         }
8553         return ppd->lstate;
8554 }
8555
8556 /**
8557  * wait_logical_linkstate - wait for an IB link state change to occur
8558  * @ppd: port device
8559  * @state: the state to wait for
8560  * @msecs: the number of milliseconds to wait
8561  *
8562  * Wait up to msecs milliseconds for IB link state change to occur.
8563  * For now, take the easy polling route.
8564  * Returns 0 if state reached, otherwise -ETIMEDOUT.
8565  */
8566 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8567                                   int msecs)
8568 {
8569         unsigned long timeout;
8570
8571         timeout = jiffies + msecs_to_jiffies(msecs);
8572         while (1) {
8573                 if (get_logical_state(ppd) == state)
8574                         return 0;
8575                 if (time_after(jiffies, timeout))
8576                         break;
8577                 msleep(20);
8578         }
8579         dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8580
8581         return -ETIMEDOUT;
8582 }
8583
8584 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8585 {
8586         static u32 remembered_state = 0xff;
8587         u32 pstate;
8588         u32 ib_pstate;
8589
8590         pstate = read_physical_state(ppd->dd);
8591         ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8592         if (remembered_state != ib_pstate) {
8593                 dd_dev_info(ppd->dd,
8594                         "%s: physical state changed to %s (0x%x), phy 0x%x\n",
8595                         __func__, opa_pstate_name(ib_pstate), ib_pstate,
8596                         pstate);
8597                 remembered_state = ib_pstate;
8598         }
8599         return ib_pstate;
8600 }
8601
8602 /*
8603  * Read/modify/write ASIC_QSFP register bits as selected by mask
8604  * data: 0 or 1 in the positions depending on what needs to be written
8605  * dir: 0 for read, 1 for write
8606  * mask: select by setting
8607  *      I2CCLK  (bit 0)
8608  *      I2CDATA (bit 1)
8609  */
8610 u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8611                   u32 mask)
8612 {
8613         u64 qsfp_oe, target_oe;
8614
8615         target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8616         if (mask) {
8617                 /* We are writing register bits, so lock access */
8618                 dir &= mask;
8619                 data &= mask;
8620
8621                 qsfp_oe = read_csr(dd, target_oe);
8622                 qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8623                 write_csr(dd, target_oe, qsfp_oe);
8624         }
8625         /* We are exclusively reading bits here, but it is unlikely
8626          * we'll get valid data when we set the direction of the pin
8627          * in the same call, so read should call this function again
8628          * to get valid data
8629          */
8630         return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8631 }
8632
8633 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8634 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8635
8636 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
8637 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8638
8639 int hfi1_init_ctxt(struct send_context *sc)
8640 {
8641         if (sc != NULL) {
8642                 struct hfi1_devdata *dd = sc->dd;
8643                 u64 reg;
8644                 u8 set = (sc->type == SC_USER ?
8645                           HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8646                           HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8647                 reg = read_kctxt_csr(dd, sc->hw_context,
8648                                      SEND_CTXT_CHECK_ENABLE);
8649                 if (set)
8650                         CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8651                 else
8652                         SET_STATIC_RATE_CONTROL_SMASK(reg);
8653                 write_kctxt_csr(dd, sc->hw_context,
8654                                 SEND_CTXT_CHECK_ENABLE, reg);
8655         }
8656         return 0;
8657 }
8658
8659 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8660 {
8661         int ret = 0;
8662         u64 reg;
8663
8664         if (dd->icode != ICODE_RTL_SILICON) {
8665                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8666                         dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8667                                     __func__);
8668                 return -EINVAL;
8669         }
8670         reg = read_csr(dd, ASIC_STS_THERM);
8671         temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8672                       ASIC_STS_THERM_CURR_TEMP_MASK);
8673         temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8674                         ASIC_STS_THERM_LO_TEMP_MASK);
8675         temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8676                         ASIC_STS_THERM_HI_TEMP_MASK);
8677         temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8678                           ASIC_STS_THERM_CRIT_TEMP_MASK);
8679         /* triggers is a 3-bit value - 1 bit per trigger. */
8680         temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8681
8682         return ret;
8683 }
8684
8685 /* ========================================================================= */
8686
8687 /*
8688  * Enable/disable chip from delivering interrupts.
8689  */
8690 void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8691 {
8692         int i;
8693
8694         /*
8695          * In HFI, the mask needs to be 1 to allow interrupts.
8696          */
8697         if (enable) {
8698                 u64 cce_int_mask;
8699                 const int qsfp1_int_smask = QSFP1_INT % 64;
8700                 const int qsfp2_int_smask = QSFP2_INT % 64;
8701
8702                 /* enable all interrupts */
8703                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8704                         write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8705
8706                 /*
8707                  * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8708                  * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8709                  * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8710                  * the index of the appropriate CSR in the CCEIntMask CSR array
8711                  */
8712                 cce_int_mask = read_csr(dd, CCE_INT_MASK +
8713                                                 (8*(QSFP1_INT/64)));
8714                 if (dd->hfi1_id) {
8715                         cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8716                         write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8717                                         cce_int_mask);
8718                 } else {
8719                         cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8720                         write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8721                                         cce_int_mask);
8722                 }
8723         } else {
8724                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8725                         write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8726         }
8727 }
8728
8729 /*
8730  * Clear all interrupt sources on the chip.
8731  */
8732 static void clear_all_interrupts(struct hfi1_devdata *dd)
8733 {
8734         int i;
8735
8736         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8737                 write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8738
8739         write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8740         write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8741         write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8742         write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8743         write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8744         write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8745         write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8746         for (i = 0; i < dd->chip_send_contexts; i++)
8747                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8748         for (i = 0; i < dd->chip_sdma_engines; i++)
8749                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8750
8751         write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8752         write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8753         write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8754 }
8755
8756 /* Move to pcie.c? */
8757 static void disable_intx(struct pci_dev *pdev)
8758 {
8759         pci_intx(pdev, 0);
8760 }
8761
8762 static void clean_up_interrupts(struct hfi1_devdata *dd)
8763 {
8764         int i;
8765
8766         /* remove irqs - must happen before disabling/turning off */
8767         if (dd->num_msix_entries) {
8768                 /* MSI-X */
8769                 struct hfi1_msix_entry *me = dd->msix_entries;
8770
8771                 for (i = 0; i < dd->num_msix_entries; i++, me++) {
8772                         if (me->arg == NULL) /* => no irq, no affinity */
8773                                 break;
8774                         irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8775                                         NULL);
8776                         free_irq(me->msix.vector, me->arg);
8777                 }
8778         } else {
8779                 /* INTx */
8780                 if (dd->requested_intx_irq) {
8781                         free_irq(dd->pcidev->irq, dd);
8782                         dd->requested_intx_irq = 0;
8783                 }
8784         }
8785
8786         /* turn off interrupts */
8787         if (dd->num_msix_entries) {
8788                 /* MSI-X */
8789                 hfi1_nomsix(dd);
8790         } else {
8791                 /* INTx */
8792                 disable_intx(dd->pcidev);
8793         }
8794
8795         /* clean structures */
8796         for (i = 0; i < dd->num_msix_entries; i++)
8797                 free_cpumask_var(dd->msix_entries[i].mask);
8798         kfree(dd->msix_entries);
8799         dd->msix_entries = NULL;
8800         dd->num_msix_entries = 0;
8801 }
8802
8803 /*
8804  * Remap the interrupt source from the general handler to the given MSI-X
8805  * interrupt.
8806  */
8807 static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8808 {
8809         u64 reg;
8810         int m, n;
8811
8812         /* clear from the handled mask of the general interrupt */
8813         m = isrc / 64;
8814         n = isrc % 64;
8815         dd->gi_mask[m] &= ~((u64)1 << n);
8816
8817         /* direct the chip source to the given MSI-X interrupt */
8818         m = isrc / 8;
8819         n = isrc % 8;
8820         reg = read_csr(dd, CCE_INT_MAP + (8*m));
8821         reg &= ~((u64)0xff << (8*n));
8822         reg |= ((u64)msix_intr & 0xff) << (8*n);
8823         write_csr(dd, CCE_INT_MAP + (8*m), reg);
8824 }
8825
8826 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8827                                   int engine, int msix_intr)
8828 {
8829         /*
8830          * SDMA engine interrupt sources grouped by type, rather than
8831          * engine.  Per-engine interrupts are as follows:
8832          *      SDMA
8833          *      SDMAProgress
8834          *      SDMAIdle
8835          */
8836         remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8837                 msix_intr);
8838         remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8839                 msix_intr);
8840         remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8841                 msix_intr);
8842 }
8843
8844 static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8845                                               int rx, int msix_intr)
8846 {
8847         remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8848 }
8849
8850 static int request_intx_irq(struct hfi1_devdata *dd)
8851 {
8852         int ret;
8853
8854         snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8855                 dd->unit);
8856         ret = request_irq(dd->pcidev->irq, general_interrupt,
8857                                   IRQF_SHARED, dd->intx_name, dd);
8858         if (ret)
8859                 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8860                                 ret);
8861         else
8862                 dd->requested_intx_irq = 1;
8863         return ret;
8864 }
8865
8866 static int request_msix_irqs(struct hfi1_devdata *dd)
8867 {
8868         const struct cpumask *local_mask;
8869         cpumask_var_t def, rcv;
8870         bool def_ret, rcv_ret;
8871         int first_general, last_general;
8872         int first_sdma, last_sdma;
8873         int first_rx, last_rx;
8874         int first_cpu, restart_cpu, curr_cpu;
8875         int rcv_cpu, sdma_cpu;
8876         int i, ret = 0, possible;
8877         int ht;
8878
8879         /* calculate the ranges we are going to use */
8880         first_general = 0;
8881         first_sdma = last_general = first_general + 1;
8882         first_rx = last_sdma = first_sdma + dd->num_sdma;
8883         last_rx = first_rx + dd->n_krcv_queues;
8884
8885         /*
8886          * Interrupt affinity.
8887          *
8888          * non-rcv avail gets a default mask that
8889          * starts as possible cpus with threads reset
8890          * and each rcv avail reset.
8891          *
8892          * rcv avail gets node relative 1 wrapping back
8893          * to the node relative 1 as necessary.
8894          *
8895          */
8896         local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8897         /* if first cpu is invalid, use NUMA 0 */
8898         if (cpumask_first(local_mask) >= nr_cpu_ids)
8899                 local_mask = topology_core_cpumask(0);
8900
8901         def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8902         rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8903         if (!def_ret || !rcv_ret)
8904                 goto bail;
8905         /* use local mask as default */
8906         cpumask_copy(def, local_mask);
8907         possible = cpumask_weight(def);
8908         /* disarm threads from default */
8909         ht = cpumask_weight(
8910                         topology_sibling_cpumask(cpumask_first(local_mask)));
8911         for (i = possible/ht; i < possible; i++)
8912                 cpumask_clear_cpu(i, def);
8913         /* reset possible */
8914         possible = cpumask_weight(def);
8915         /* def now has full cores on chosen node*/
8916         first_cpu = cpumask_first(def);
8917         if (nr_cpu_ids >= first_cpu)
8918                 first_cpu++;
8919         restart_cpu = first_cpu;
8920         curr_cpu = restart_cpu;
8921
8922         for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8923                 cpumask_clear_cpu(curr_cpu, def);
8924                 cpumask_set_cpu(curr_cpu, rcv);
8925                 if (curr_cpu >= possible)
8926                         curr_cpu = restart_cpu;
8927                 else
8928                         curr_cpu++;
8929         }
8930         /* def mask has non-rcv, rcv has recv mask */
8931         rcv_cpu = cpumask_first(rcv);
8932         sdma_cpu = cpumask_first(def);
8933
8934         /*
8935          * Sanity check - the code expects all SDMA chip source
8936          * interrupts to be in the same CSR, starting at bit 0.  Verify
8937          * that this is true by checking the bit location of the start.
8938          */
8939         BUILD_BUG_ON(IS_SDMA_START % 64);
8940
8941         for (i = 0; i < dd->num_msix_entries; i++) {
8942                 struct hfi1_msix_entry *me = &dd->msix_entries[i];
8943                 const char *err_info;
8944                 irq_handler_t handler;
8945                 irq_handler_t thread = NULL;
8946                 void *arg;
8947                 int idx;
8948                 struct hfi1_ctxtdata *rcd = NULL;
8949                 struct sdma_engine *sde = NULL;
8950
8951                 /* obtain the arguments to request_irq */
8952                 if (first_general <= i && i < last_general) {
8953                         idx = i - first_general;
8954                         handler = general_interrupt;
8955                         arg = dd;
8956                         snprintf(me->name, sizeof(me->name),
8957                                 DRIVER_NAME"_%d", dd->unit);
8958                         err_info = "general";
8959                 } else if (first_sdma <= i && i < last_sdma) {
8960                         idx = i - first_sdma;
8961                         sde = &dd->per_sdma[idx];
8962                         handler = sdma_interrupt;
8963                         arg = sde;
8964                         snprintf(me->name, sizeof(me->name),
8965                                 DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8966                         err_info = "sdma";
8967                         remap_sdma_interrupts(dd, idx, i);
8968                 } else if (first_rx <= i && i < last_rx) {
8969                         idx = i - first_rx;
8970                         rcd = dd->rcd[idx];
8971                         /* no interrupt if no rcd */
8972                         if (!rcd)
8973                                 continue;
8974                         /*
8975                          * Set the interrupt register and mask for this
8976                          * context's interrupt.
8977                          */
8978                         rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8979                         rcd->imask = ((u64)1) <<
8980                                         ((IS_RCVAVAIL_START+idx) % 64);
8981                         handler = receive_context_interrupt;
8982                         thread = receive_context_thread;
8983                         arg = rcd;
8984                         snprintf(me->name, sizeof(me->name),
8985                                 DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8986                         err_info = "receive context";
8987                         remap_receive_available_interrupt(dd, idx, i);
8988                 } else {
8989                         /* not in our expected range - complain, then
8990                            ignore it */
8991                         dd_dev_err(dd,
8992                                 "Unexpected extra MSI-X interrupt %d\n", i);
8993                         continue;
8994                 }
8995                 /* no argument, no interrupt */
8996                 if (arg == NULL)
8997                         continue;
8998                 /* make sure the name is terminated */
8999                 me->name[sizeof(me->name)-1] = 0;
9000
9001                 ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9002                                                 me->name, arg);
9003                 if (ret) {
9004                         dd_dev_err(dd,
9005                                 "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9006                                  err_info, me->msix.vector, idx, ret);
9007                         return ret;
9008                 }
9009                 /*
9010                  * assign arg after request_irq call, so it will be
9011                  * cleaned up
9012                  */
9013                 me->arg = arg;
9014
9015                 if (!zalloc_cpumask_var(
9016                         &dd->msix_entries[i].mask,
9017                         GFP_KERNEL))
9018                         goto bail;
9019                 if (handler == sdma_interrupt) {
9020                         dd_dev_info(dd, "sdma engine %d cpu %d\n",
9021                                 sde->this_idx, sdma_cpu);
9022                         cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9023                         sdma_cpu = cpumask_next(sdma_cpu, def);
9024                         if (sdma_cpu >= nr_cpu_ids)
9025                                 sdma_cpu = cpumask_first(def);
9026                 } else if (handler == receive_context_interrupt) {
9027                         dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
9028                                 rcd->ctxt, rcv_cpu);
9029                         cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
9030                         rcv_cpu = cpumask_next(rcv_cpu, rcv);
9031                         if (rcv_cpu >= nr_cpu_ids)
9032                                 rcv_cpu = cpumask_first(rcv);
9033                 } else {
9034                         /* otherwise first def */
9035                         dd_dev_info(dd, "%s cpu %d\n",
9036                                 err_info, cpumask_first(def));
9037                         cpumask_set_cpu(
9038                                 cpumask_first(def), dd->msix_entries[i].mask);
9039                 }
9040                 irq_set_affinity_hint(
9041                         dd->msix_entries[i].msix.vector,
9042                         dd->msix_entries[i].mask);
9043         }
9044
9045 out:
9046         free_cpumask_var(def);
9047         free_cpumask_var(rcv);
9048         return ret;
9049 bail:
9050         ret = -ENOMEM;
9051         goto  out;
9052 }
9053
9054 /*
9055  * Set the general handler to accept all interrupts, remap all
9056  * chip interrupts back to MSI-X 0.
9057  */
9058 static void reset_interrupts(struct hfi1_devdata *dd)
9059 {
9060         int i;
9061
9062         /* all interrupts handled by the general handler */
9063         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9064                 dd->gi_mask[i] = ~(u64)0;
9065
9066         /* all chip interrupts map to MSI-X 0 */
9067         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9068                 write_csr(dd, CCE_INT_MAP + (8*i), 0);
9069 }
9070
9071 static int set_up_interrupts(struct hfi1_devdata *dd)
9072 {
9073         struct hfi1_msix_entry *entries;
9074         u32 total, request;
9075         int i, ret;
9076         int single_interrupt = 0; /* we expect to have all the interrupts */
9077
9078         /*
9079          * Interrupt count:
9080          *      1 general, "slow path" interrupt (includes the SDMA engines
9081          *              slow source, SDMACleanupDone)
9082          *      N interrupts - one per used SDMA engine
9083          *      M interrupt - one per kernel receive context
9084          */
9085         total = 1 + dd->num_sdma + dd->n_krcv_queues;
9086
9087         entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9088         if (!entries) {
9089                 ret = -ENOMEM;
9090                 goto fail;
9091         }
9092         /* 1-1 MSI-X entry assignment */
9093         for (i = 0; i < total; i++)
9094                 entries[i].msix.entry = i;
9095
9096         /* ask for MSI-X interrupts */
9097         request = total;
9098         request_msix(dd, &request, entries);
9099
9100         if (request == 0) {
9101                 /* using INTx */
9102                 /* dd->num_msix_entries already zero */
9103                 kfree(entries);
9104                 single_interrupt = 1;
9105                 dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9106         } else {
9107                 /* using MSI-X */
9108                 dd->num_msix_entries = request;
9109                 dd->msix_entries = entries;
9110
9111                 if (request != total) {
9112                         /* using MSI-X, with reduced interrupts */
9113                         dd_dev_err(
9114                                 dd,
9115                                 "cannot handle reduced interrupt case, want %u, got %u\n",
9116                                 total, request);
9117                         ret = -EINVAL;
9118                         goto fail;
9119                 }
9120                 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9121         }
9122
9123         /* mask all interrupts */
9124         set_intr_state(dd, 0);
9125         /* clear all pending interrupts */
9126         clear_all_interrupts(dd);
9127
9128         /* reset general handler mask, chip MSI-X mappings */
9129         reset_interrupts(dd);
9130
9131         if (single_interrupt)
9132                 ret = request_intx_irq(dd);
9133         else
9134                 ret = request_msix_irqs(dd);
9135         if (ret)
9136                 goto fail;
9137
9138         return 0;
9139
9140 fail:
9141         clean_up_interrupts(dd);
9142         return ret;
9143 }
9144
9145 /*
9146  * Set up context values in dd.  Sets:
9147  *
9148  *      num_rcv_contexts - number of contexts being used
9149  *      n_krcv_queues - number of kernel contexts
9150  *      first_user_ctxt - first non-kernel context in array of contexts
9151  *      freectxts  - number of free user contexts
9152  *      num_send_contexts - number of PIO send contexts being used
9153  */
9154 static int set_up_context_variables(struct hfi1_devdata *dd)
9155 {
9156         int num_kernel_contexts;
9157         int num_user_contexts;
9158         int total_contexts;
9159         int ret;
9160         unsigned ngroups;
9161
9162         /*
9163          * Kernel contexts: (to be fixed later):
9164          * - min or 2 or 1 context/numa
9165          * - Context 0 - default/errors
9166          * - Context 1 - VL15
9167          */
9168         if (n_krcvqs)
9169                 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9170         else
9171                 num_kernel_contexts = num_online_nodes();
9172         num_kernel_contexts =
9173                 max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9174         /*
9175          * Every kernel receive context needs an ACK send context.
9176          * one send context is allocated for each VL{0-7} and VL15
9177          */
9178         if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9179                 dd_dev_err(dd,
9180                            "Reducing # kernel rcv contexts to: %d, from %d\n",
9181                            (int)(dd->chip_send_contexts - num_vls - 1),
9182                            (int)num_kernel_contexts);
9183                 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9184         }
9185         /*
9186          * User contexts: (to be fixed later)
9187          *      - set to num_rcv_contexts if non-zero
9188          *      - default to 1 user context per CPU
9189          */
9190         if (num_rcv_contexts)
9191                 num_user_contexts = num_rcv_contexts;
9192         else
9193                 num_user_contexts = num_online_cpus();
9194
9195         total_contexts = num_kernel_contexts + num_user_contexts;
9196
9197         /*
9198          * Adjust the counts given a global max.
9199          */
9200         if (total_contexts > dd->chip_rcv_contexts) {
9201                 dd_dev_err(dd,
9202                            "Reducing # user receive contexts to: %d, from %d\n",
9203                            (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9204                            (int)num_user_contexts);
9205                 num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9206                 /* recalculate */
9207                 total_contexts = num_kernel_contexts + num_user_contexts;
9208         }
9209
9210         /* the first N are kernel contexts, the rest are user contexts */
9211         dd->num_rcv_contexts = total_contexts;
9212         dd->n_krcv_queues = num_kernel_contexts;
9213         dd->first_user_ctxt = num_kernel_contexts;
9214         dd->freectxts = num_user_contexts;
9215         dd_dev_info(dd,
9216                 "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9217                 (int)dd->chip_rcv_contexts,
9218                 (int)dd->num_rcv_contexts,
9219                 (int)dd->n_krcv_queues,
9220                 (int)dd->num_rcv_contexts - dd->n_krcv_queues);
9221
9222         /*
9223          * Receive array allocation:
9224          *   All RcvArray entries are divided into groups of 8. This
9225          *   is required by the hardware and will speed up writes to
9226          *   consecutive entries by using write-combining of the entire
9227          *   cacheline.
9228          *
9229          *   The number of groups are evenly divided among all contexts.
9230          *   any left over groups will be given to the first N user
9231          *   contexts.
9232          */
9233         dd->rcv_entries.group_size = RCV_INCREMENT;
9234         ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9235         dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9236         dd->rcv_entries.nctxt_extra = ngroups -
9237                 (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9238         dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9239                     dd->rcv_entries.ngroups,
9240                     dd->rcv_entries.nctxt_extra);
9241         if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9242             MAX_EAGER_ENTRIES * 2) {
9243                 dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9244                         dd->rcv_entries.group_size;
9245                 dd_dev_info(dd,
9246                    "RcvArray group count too high, change to %u\n",
9247                    dd->rcv_entries.ngroups);
9248                 dd->rcv_entries.nctxt_extra = 0;
9249         }
9250         /*
9251          * PIO send contexts
9252          */
9253         ret = init_sc_pools_and_sizes(dd);
9254         if (ret >= 0) { /* success */
9255                 dd->num_send_contexts = ret;
9256                 dd_dev_info(
9257                         dd,
9258                         "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9259                         dd->chip_send_contexts,
9260                         dd->num_send_contexts,
9261                         dd->sc_sizes[SC_KERNEL].count,
9262                         dd->sc_sizes[SC_ACK].count,
9263                         dd->sc_sizes[SC_USER].count);
9264                 ret = 0;        /* success */
9265         }
9266
9267         return ret;
9268 }
9269
9270 /*
9271  * Set the device/port partition key table. The MAD code
9272  * will ensure that, at least, the partial management
9273  * partition key is present in the table.
9274  */
9275 static void set_partition_keys(struct hfi1_pportdata *ppd)
9276 {
9277         struct hfi1_devdata *dd = ppd->dd;
9278         u64 reg = 0;
9279         int i;
9280
9281         dd_dev_info(dd, "Setting partition keys\n");
9282         for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9283                 reg |= (ppd->pkeys[i] &
9284                         RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9285                         ((i % 4) *
9286                          RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9287                 /* Each register holds 4 PKey values. */
9288                 if ((i % 4) == 3) {
9289                         write_csr(dd, RCV_PARTITION_KEY +
9290                                   ((i - 3) * 2), reg);
9291                         reg = 0;
9292                 }
9293         }
9294
9295         /* Always enable HW pkeys check when pkeys table is set */
9296         add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9297 }
9298
9299 /*
9300  * These CSRs and memories are uninitialized on reset and must be
9301  * written before reading to set the ECC/parity bits.
9302  *
9303  * NOTE: All user context CSRs that are not mmaped write-only
9304  * (e.g. the TID flows) must be initialized even if the driver never
9305  * reads them.
9306  */
9307 static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9308 {
9309         int i, j;
9310
9311         /* CceIntMap */
9312         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9313                 write_csr(dd, CCE_INT_MAP+(8*i), 0);
9314
9315         /* SendCtxtCreditReturnAddr */
9316         for (i = 0; i < dd->chip_send_contexts; i++)
9317                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9318
9319         /* PIO Send buffers */
9320         /* SDMA Send buffers */
9321         /* These are not normally read, and (presently) have no method
9322            to be read, so are not pre-initialized */
9323
9324         /* RcvHdrAddr */
9325         /* RcvHdrTailAddr */
9326         /* RcvTidFlowTable */
9327         for (i = 0; i < dd->chip_rcv_contexts; i++) {
9328                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9329                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9330                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9331                         write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9332         }
9333
9334         /* RcvArray */
9335         for (i = 0; i < dd->chip_rcv_array_count; i++)
9336                 write_csr(dd, RCV_ARRAY + (8*i),
9337                                         RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9338
9339         /* RcvQPMapTable */
9340         for (i = 0; i < 32; i++)
9341                 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9342 }
9343
9344 /*
9345  * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9346  */
9347 static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9348                              u64 ctrl_bits)
9349 {
9350         unsigned long timeout;
9351         u64 reg;
9352
9353         /* is the condition present? */
9354         reg = read_csr(dd, CCE_STATUS);
9355         if ((reg & status_bits) == 0)
9356                 return;
9357
9358         /* clear the condition */
9359         write_csr(dd, CCE_CTRL, ctrl_bits);
9360
9361         /* wait for the condition to clear */
9362         timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9363         while (1) {
9364                 reg = read_csr(dd, CCE_STATUS);
9365                 if ((reg & status_bits) == 0)
9366                         return;
9367                 if (time_after(jiffies, timeout)) {
9368                         dd_dev_err(dd,
9369                                 "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9370                                 status_bits, reg & status_bits);
9371                         return;
9372                 }
9373                 udelay(1);
9374         }
9375 }
9376
9377 /* set CCE CSRs to chip reset defaults */
9378 static void reset_cce_csrs(struct hfi1_devdata *dd)
9379 {
9380         int i;
9381
9382         /* CCE_REVISION read-only */
9383         /* CCE_REVISION2 read-only */
9384         /* CCE_CTRL - bits clear automatically */
9385         /* CCE_STATUS read-only, use CceCtrl to clear */
9386         clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9387         clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9388         clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9389         for (i = 0; i < CCE_NUM_SCRATCH; i++)
9390                 write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9391         /* CCE_ERR_STATUS read-only */
9392         write_csr(dd, CCE_ERR_MASK, 0);
9393         write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9394         /* CCE_ERR_FORCE leave alone */
9395         for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9396                 write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9397         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9398         /* CCE_PCIE_CTRL leave alone */
9399         for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9400                 write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9401                 write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9402                                         CCE_MSIX_TABLE_UPPER_RESETCSR);
9403         }
9404         for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9405                 /* CCE_MSIX_PBA read-only */
9406                 write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9407                 write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9408         }
9409         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9410                 write_csr(dd, CCE_INT_MAP, 0);
9411         for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9412                 /* CCE_INT_STATUS read-only */
9413                 write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9414                 write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9415                 /* CCE_INT_FORCE leave alone */
9416                 /* CCE_INT_BLOCKED read-only */
9417         }
9418         for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9419                 write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9420 }
9421
9422 /* set ASIC CSRs to chip reset defaults */
9423 static void reset_asic_csrs(struct hfi1_devdata *dd)
9424 {
9425         int i;
9426
9427         /*
9428          * If the HFIs are shared between separate nodes or VMs,
9429          * then more will need to be done here.  One idea is a module
9430          * parameter that returns early, letting the first power-on or
9431          * a known first load do the reset and blocking all others.
9432          */
9433
9434         if (!(dd->flags & HFI1_DO_INIT_ASIC))
9435                 return;
9436
9437         if (dd->icode != ICODE_FPGA_EMULATION) {
9438                 /* emulation does not have an SBus - leave these alone */
9439                 /*
9440                  * All writes to ASIC_CFG_SBUS_REQUEST do something.
9441                  * Notes:
9442                  * o The reset is not zero if aimed at the core.  See the
9443                  *   SBus documentation for details.
9444                  * o If the SBus firmware has been updated (e.g. by the BIOS),
9445                  *   will the reset revert that?
9446                  */
9447                 /* ASIC_CFG_SBUS_REQUEST leave alone */
9448                 write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9449         }
9450         /* ASIC_SBUS_RESULT read-only */
9451         write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9452         for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9453                 write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9454         write_csr(dd, ASIC_CFG_MUTEX, 0);       /* this will clear it */
9455
9456         /* We might want to retain this state across FLR if we ever use it */
9457         write_csr(dd, ASIC_CFG_DRV_STR, 0);
9458
9459         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9460         /* ASIC_STS_THERM read-only */
9461         /* ASIC_CFG_RESET leave alone */
9462
9463         write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9464         /* ASIC_PCIE_SD_HOST_STATUS read-only */
9465         write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9466         write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9467         /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9468         write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9469         /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9470         /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9471         for (i = 0; i < 16; i++)
9472                 write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9473
9474         /* ASIC_GPIO_IN read-only */
9475         write_csr(dd, ASIC_GPIO_OE, 0);
9476         write_csr(dd, ASIC_GPIO_INVERT, 0);
9477         write_csr(dd, ASIC_GPIO_OUT, 0);
9478         write_csr(dd, ASIC_GPIO_MASK, 0);
9479         /* ASIC_GPIO_STATUS read-only */
9480         write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9481         /* ASIC_GPIO_FORCE leave alone */
9482
9483         /* ASIC_QSFP1_IN read-only */
9484         write_csr(dd, ASIC_QSFP1_OE, 0);
9485         write_csr(dd, ASIC_QSFP1_INVERT, 0);
9486         write_csr(dd, ASIC_QSFP1_OUT, 0);
9487         write_csr(dd, ASIC_QSFP1_MASK, 0);
9488         /* ASIC_QSFP1_STATUS read-only */
9489         write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9490         /* ASIC_QSFP1_FORCE leave alone */
9491
9492         /* ASIC_QSFP2_IN read-only */
9493         write_csr(dd, ASIC_QSFP2_OE, 0);
9494         write_csr(dd, ASIC_QSFP2_INVERT, 0);
9495         write_csr(dd, ASIC_QSFP2_OUT, 0);
9496         write_csr(dd, ASIC_QSFP2_MASK, 0);
9497         /* ASIC_QSFP2_STATUS read-only */
9498         write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9499         /* ASIC_QSFP2_FORCE leave alone */
9500
9501         write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9502         /* this also writes a NOP command, clearing paging mode */
9503         write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9504         write_csr(dd, ASIC_EEP_DATA, 0);
9505 }
9506
9507 /* set MISC CSRs to chip reset defaults */
9508 static void reset_misc_csrs(struct hfi1_devdata *dd)
9509 {
9510         int i;
9511
9512         for (i = 0; i < 32; i++) {
9513                 write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9514                 write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9515                 write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9516         }
9517         /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9518            only be written 128-byte chunks */
9519         /* init RSA engine to clear lingering errors */
9520         write_csr(dd, MISC_CFG_RSA_CMD, 1);
9521         write_csr(dd, MISC_CFG_RSA_MU, 0);
9522         write_csr(dd, MISC_CFG_FW_CTRL, 0);
9523         /* MISC_STS_8051_DIGEST read-only */
9524         /* MISC_STS_SBM_DIGEST read-only */
9525         /* MISC_STS_PCIE_DIGEST read-only */
9526         /* MISC_STS_FAB_DIGEST read-only */
9527         /* MISC_ERR_STATUS read-only */
9528         write_csr(dd, MISC_ERR_MASK, 0);
9529         write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9530         /* MISC_ERR_FORCE leave alone */
9531 }
9532
9533 /* set TXE CSRs to chip reset defaults */
9534 static void reset_txe_csrs(struct hfi1_devdata *dd)
9535 {
9536         int i;
9537
9538         /*
9539          * TXE Kernel CSRs
9540          */
9541         write_csr(dd, SEND_CTRL, 0);
9542         __cm_reset(dd, 0);      /* reset CM internal state */
9543         /* SEND_CONTEXTS read-only */
9544         /* SEND_DMA_ENGINES read-only */
9545         /* SEND_PIO_MEM_SIZE read-only */
9546         /* SEND_DMA_MEM_SIZE read-only */
9547         write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9548         pio_reset_all(dd);      /* SEND_PIO_INIT_CTXT */
9549         /* SEND_PIO_ERR_STATUS read-only */
9550         write_csr(dd, SEND_PIO_ERR_MASK, 0);
9551         write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9552         /* SEND_PIO_ERR_FORCE leave alone */
9553         /* SEND_DMA_ERR_STATUS read-only */
9554         write_csr(dd, SEND_DMA_ERR_MASK, 0);
9555         write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9556         /* SEND_DMA_ERR_FORCE leave alone */
9557         /* SEND_EGRESS_ERR_STATUS read-only */
9558         write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9559         write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9560         /* SEND_EGRESS_ERR_FORCE leave alone */
9561         write_csr(dd, SEND_BTH_QP, 0);
9562         write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9563         write_csr(dd, SEND_SC2VLT0, 0);
9564         write_csr(dd, SEND_SC2VLT1, 0);
9565         write_csr(dd, SEND_SC2VLT2, 0);
9566         write_csr(dd, SEND_SC2VLT3, 0);
9567         write_csr(dd, SEND_LEN_CHECK0, 0);
9568         write_csr(dd, SEND_LEN_CHECK1, 0);
9569         /* SEND_ERR_STATUS read-only */
9570         write_csr(dd, SEND_ERR_MASK, 0);
9571         write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9572         /* SEND_ERR_FORCE read-only */
9573         for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9574                 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9575         for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9576                 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9577         for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9578                 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9579         for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9580                 write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9581         for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9582                 write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9583         write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9584         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9585                                         SEND_CM_GLOBAL_CREDIT_RESETCSR);
9586         /* SEND_CM_CREDIT_USED_STATUS read-only */
9587         write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9588         write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9589         write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9590         write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9591         write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9592         for (i = 0; i < TXE_NUM_DATA_VL; i++)
9593                 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9594         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9595         /* SEND_CM_CREDIT_USED_VL read-only */
9596         /* SEND_CM_CREDIT_USED_VL15 read-only */
9597         /* SEND_EGRESS_CTXT_STATUS read-only */
9598         /* SEND_EGRESS_SEND_DMA_STATUS read-only */
9599         write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9600         /* SEND_EGRESS_ERR_INFO read-only */
9601         /* SEND_EGRESS_ERR_SOURCE read-only */
9602
9603         /*
9604          * TXE Per-Context CSRs
9605          */
9606         for (i = 0; i < dd->chip_send_contexts; i++) {
9607                 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9608                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9609                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9610                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9611                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9612                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9613                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9614                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9615                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9616                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9617                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9618                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9619         }
9620
9621         /*
9622          * TXE Per-SDMA CSRs
9623          */
9624         for (i = 0; i < dd->chip_sdma_engines; i++) {
9625                 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9626                 /* SEND_DMA_STATUS read-only */
9627                 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9628                 write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9629                 write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9630                 /* SEND_DMA_HEAD read-only */
9631                 write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9632                 write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9633                 /* SEND_DMA_IDLE_CNT read-only */
9634                 write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9635                 write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9636                 /* SEND_DMA_DESC_FETCHED_CNT read-only */
9637                 /* SEND_DMA_ENG_ERR_STATUS read-only */
9638                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9639                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9640                 /* SEND_DMA_ENG_ERR_FORCE leave alone */
9641                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9642                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9643                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9644                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9645                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9646                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9647                 write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9648         }
9649 }
9650
9651 /*
9652  * Expect on entry:
9653  * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9654  */
9655 static void init_rbufs(struct hfi1_devdata *dd)
9656 {
9657         u64 reg;
9658         int count;
9659
9660         /*
9661          * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9662          * clear.
9663          */
9664         count = 0;
9665         while (1) {
9666                 reg = read_csr(dd, RCV_STATUS);
9667                 if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9668                             | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9669                         break;
9670                 /*
9671                  * Give up after 1ms - maximum wait time.
9672                  *
9673                  * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9674                  * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9675                  *      148 KB / (66% * 250MB/s) = 920us
9676                  */
9677                 if (count++ > 500) {
9678                         dd_dev_err(dd,
9679                                 "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9680                                 __func__, reg);
9681                         break;
9682                 }
9683                 udelay(2); /* do not busy-wait the CSR */
9684         }
9685
9686         /* start the init - expect RcvCtrl to be 0 */
9687         write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9688
9689         /*
9690          * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9691          * period after the write before RcvStatus.RxRbufInitDone is valid.
9692          * The delay in the first run through the loop below is sufficient and
9693          * required before the first read of RcvStatus.RxRbufInintDone.
9694          */
9695         read_csr(dd, RCV_CTRL);
9696
9697         /* wait for the init to finish */
9698         count = 0;
9699         while (1) {
9700                 /* delay is required first time through - see above */
9701                 udelay(2); /* do not busy-wait the CSR */
9702                 reg = read_csr(dd, RCV_STATUS);
9703                 if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9704                         break;
9705
9706                 /* give up after 100us - slowest possible at 33MHz is 73us */
9707                 if (count++ > 50) {
9708                         dd_dev_err(dd,
9709                                 "%s: RcvStatus.RxRbufInit not set, continuing\n",
9710                                 __func__);
9711                         break;
9712                 }
9713         }
9714 }
9715
9716 /* set RXE CSRs to chip reset defaults */
9717 static void reset_rxe_csrs(struct hfi1_devdata *dd)
9718 {
9719         int i, j;
9720
9721         /*
9722          * RXE Kernel CSRs
9723          */
9724         write_csr(dd, RCV_CTRL, 0);
9725         init_rbufs(dd);
9726         /* RCV_STATUS read-only */
9727         /* RCV_CONTEXTS read-only */
9728         /* RCV_ARRAY_CNT read-only */
9729         /* RCV_BUF_SIZE read-only */
9730         write_csr(dd, RCV_BTH_QP, 0);
9731         write_csr(dd, RCV_MULTICAST, 0);
9732         write_csr(dd, RCV_BYPASS, 0);
9733         write_csr(dd, RCV_VL15, 0);
9734         /* this is a clear-down */
9735         write_csr(dd, RCV_ERR_INFO,
9736                         RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9737         /* RCV_ERR_STATUS read-only */
9738         write_csr(dd, RCV_ERR_MASK, 0);
9739         write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9740         /* RCV_ERR_FORCE leave alone */
9741         for (i = 0; i < 32; i++)
9742                 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9743         for (i = 0; i < 4; i++)
9744                 write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9745         for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9746                 write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9747         for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9748                 write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9749         for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9750                 write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9751                 write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9752                 write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9753         }
9754         for (i = 0; i < 32; i++)
9755                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9756
9757         /*
9758          * RXE Kernel and User Per-Context CSRs
9759          */
9760         for (i = 0; i < dd->chip_rcv_contexts; i++) {
9761                 /* kernel */
9762                 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9763                 /* RCV_CTXT_STATUS read-only */
9764                 write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9765                 write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9766                 write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9767                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9768                 write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9769                 write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9770                 write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9771                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9772                 write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9773                 write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9774
9775                 /* user */
9776                 /* RCV_HDR_TAIL read-only */
9777                 write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9778                 /* RCV_EGR_INDEX_TAIL read-only */
9779                 write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9780                 /* RCV_EGR_OFFSET_TAIL read-only */
9781                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9782                         write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9783                                 0);
9784                 }
9785         }
9786 }
9787
9788 /*
9789  * Set sc2vl tables.
9790  *
9791  * They power on to zeros, so to avoid send context errors
9792  * they need to be set:
9793  *
9794  * SC 0-7 -> VL 0-7 (respectively)
9795  * SC 15  -> VL 15
9796  * otherwise
9797  *        -> VL 0
9798  */
9799 static void init_sc2vl_tables(struct hfi1_devdata *dd)
9800 {
9801         int i;
9802         /* init per architecture spec, constrained by hardware capability */
9803
9804         /* HFI maps sent packets */
9805         write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9806                 0,
9807                 0, 0, 1, 1,
9808                 2, 2, 3, 3,
9809                 4, 4, 5, 5,
9810                 6, 6, 7, 7));
9811         write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9812                 1,
9813                 8, 0, 9, 0,
9814                 10, 0, 11, 0,
9815                 12, 0, 13, 0,
9816                 14, 0, 15, 15));
9817         write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9818                 2,
9819                 16, 0, 17, 0,
9820                 18, 0, 19, 0,
9821                 20, 0, 21, 0,
9822                 22, 0, 23, 0));
9823         write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9824                 3,
9825                 24, 0, 25, 0,
9826                 26, 0, 27, 0,
9827                 28, 0, 29, 0,
9828                 30, 0, 31, 0));
9829
9830         /* DC maps received packets */
9831         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9832                 15_0,
9833                 0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9834                 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9835         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9836                 31_16,
9837                 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9838                 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9839
9840         /* initialize the cached sc2vl values consistently with h/w */
9841         for (i = 0; i < 32; i++) {
9842                 if (i < 8 || i == 15)
9843                         *((u8 *)(dd->sc2vl) + i) = (u8)i;
9844                 else
9845                         *((u8 *)(dd->sc2vl) + i) = 0;
9846         }
9847 }
9848
9849 /*
9850  * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9851  * depend on the chip going through a power-on reset - a driver may be loaded
9852  * and unloaded many times.
9853  *
9854  * Do not write any CSR values to the chip in this routine - there may be
9855  * a reset following the (possible) FLR in this routine.
9856  *
9857  */
9858 static void init_chip(struct hfi1_devdata *dd)
9859 {
9860         int i;
9861
9862         /*
9863          * Put the HFI CSRs in a known state.
9864          * Combine this with a DC reset.
9865          *
9866          * Stop the device from doing anything while we do a
9867          * reset.  We know there are no other active users of
9868          * the device since we are now in charge.  Turn off
9869          * off all outbound and inbound traffic and make sure
9870          * the device does not generate any interrupts.
9871          */
9872
9873         /* disable send contexts and SDMA engines */
9874         write_csr(dd, SEND_CTRL, 0);
9875         for (i = 0; i < dd->chip_send_contexts; i++)
9876                 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9877         for (i = 0; i < dd->chip_sdma_engines; i++)
9878                 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9879         /* disable port (turn off RXE inbound traffic) and contexts */
9880         write_csr(dd, RCV_CTRL, 0);
9881         for (i = 0; i < dd->chip_rcv_contexts; i++)
9882                 write_csr(dd, RCV_CTXT_CTRL, 0);
9883         /* mask all interrupt sources */
9884         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9885                 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9886
9887         /*
9888          * DC Reset: do a full DC reset before the register clear.
9889          * A recommended length of time to hold is one CSR read,
9890          * so reread the CceDcCtrl.  Then, hold the DC in reset
9891          * across the clear.
9892          */
9893         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9894         (void) read_csr(dd, CCE_DC_CTRL);
9895
9896         if (use_flr) {
9897                 /*
9898                  * A FLR will reset the SPC core and part of the PCIe.
9899                  * The parts that need to be restored have already been
9900                  * saved.
9901                  */
9902                 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9903
9904                 /* do the FLR, the DC reset will remain */
9905                 hfi1_pcie_flr(dd);
9906
9907                 /* restore command and BARs */
9908                 restore_pci_variables(dd);
9909
9910                 if (is_a0(dd)) {
9911                         dd_dev_info(dd, "Resetting CSRs with FLR\n");
9912                         hfi1_pcie_flr(dd);
9913                         restore_pci_variables(dd);
9914                 }
9915
9916                 reset_asic_csrs(dd);
9917         } else {
9918                 dd_dev_info(dd, "Resetting CSRs with writes\n");
9919                 reset_cce_csrs(dd);
9920                 reset_txe_csrs(dd);
9921                 reset_rxe_csrs(dd);
9922                 reset_asic_csrs(dd);
9923                 reset_misc_csrs(dd);
9924         }
9925         /* clear the DC reset */
9926         write_csr(dd, CCE_DC_CTRL, 0);
9927
9928         /* Set the LED off */
9929         if (is_a0(dd))
9930                 setextled(dd, 0);
9931         /*
9932          * Clear the QSFP reset.
9933          * A0 leaves the out lines floating on power on, then on an FLR
9934          * enforces a 0 on all out pins.  The driver does not touch
9935          * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9936          * anything  plugged constantly in reset, if it pays attention
9937          * to RESET_N.
9938          * A prime example of this is SiPh. For now, set all pins high.
9939          * I2CCLK and I2CDAT will change per direction, and INT_N and
9940          * MODPRS_N are input only and their value is ignored.
9941          */
9942         if (is_a0(dd)) {
9943                 write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9944                 write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9945         }
9946 }
9947
9948 static void init_early_variables(struct hfi1_devdata *dd)
9949 {
9950         int i;
9951
9952         /* assign link credit variables */
9953         dd->vau = CM_VAU;
9954         dd->link_credits = CM_GLOBAL_CREDITS;
9955         if (is_a0(dd))
9956                 dd->link_credits--;
9957         dd->vcu = cu_to_vcu(hfi1_cu);
9958         /* enough room for 8 MAD packets plus header - 17K */
9959         dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9960         if (dd->vl15_init > dd->link_credits)
9961                 dd->vl15_init = dd->link_credits;
9962
9963         write_uninitialized_csrs_and_memories(dd);
9964
9965         if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9966                 for (i = 0; i < dd->num_pports; i++) {
9967                         struct hfi1_pportdata *ppd = &dd->pport[i];
9968
9969                         set_partition_keys(ppd);
9970                 }
9971         init_sc2vl_tables(dd);
9972 }
9973
9974 static void init_kdeth_qp(struct hfi1_devdata *dd)
9975 {
9976         /* user changed the KDETH_QP */
9977         if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9978                 /* out of range or illegal value */
9979                 dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9980                 kdeth_qp = 0;
9981         }
9982         if (kdeth_qp == 0)      /* not set, or failed range check */
9983                 kdeth_qp = DEFAULT_KDETH_QP;
9984
9985         write_csr(dd, SEND_BTH_QP,
9986                         (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9987                                 << SEND_BTH_QP_KDETH_QP_SHIFT);
9988
9989         write_csr(dd, RCV_BTH_QP,
9990                         (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9991                                 << RCV_BTH_QP_KDETH_QP_SHIFT);
9992 }
9993
9994 /**
9995  * init_qpmap_table
9996  * @dd - device data
9997  * @first_ctxt - first context
9998  * @last_ctxt - first context
9999  *
10000  * This return sets the qpn mapping table that
10001  * is indexed by qpn[8:1].
10002  *
10003  * The routine will round robin the 256 settings
10004  * from first_ctxt to last_ctxt.
10005  *
10006  * The first/last looks ahead to having specialized
10007  * receive contexts for mgmt and bypass.  Normal
10008  * verbs traffic will assumed to be on a range
10009  * of receive contexts.
10010  */
10011 static void init_qpmap_table(struct hfi1_devdata *dd,
10012                              u32 first_ctxt,
10013                              u32 last_ctxt)
10014 {
10015         u64 reg = 0;
10016         u64 regno = RCV_QP_MAP_TABLE;
10017         int i;
10018         u64 ctxt = first_ctxt;
10019
10020         for (i = 0; i < 256;) {
10021                 if (ctxt == VL15CTXT) {
10022                         ctxt++;
10023                         if (ctxt > last_ctxt)
10024                                 ctxt = first_ctxt;
10025                         continue;
10026                 }
10027                 reg |= ctxt << (8 * (i % 8));
10028                 i++;
10029                 ctxt++;
10030                 if (ctxt > last_ctxt)
10031                         ctxt = first_ctxt;
10032                 if (i % 8 == 0) {
10033                         write_csr(dd, regno, reg);
10034                         reg = 0;
10035                         regno += 8;
10036                 }
10037         }
10038         if (i % 8)
10039                 write_csr(dd, regno, reg);
10040
10041         add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10042                         | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10043 }
10044
10045 /**
10046  * init_qos - init RX qos
10047  * @dd - device data
10048  * @first_context
10049  *
10050  * This routine initializes Rule 0 and the
10051  * RSM map table to implement qos.
10052  *
10053  * If all of the limit tests succeed,
10054  * qos is applied based on the array
10055  * interpretation of krcvqs where
10056  * entry 0 is VL0.
10057  *
10058  * The number of vl bits (n) and the number of qpn
10059  * bits (m) are computed to feed both the RSM map table
10060  * and the single rule.
10061  *
10062  */
10063 static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10064 {
10065         u8 max_by_vl = 0;
10066         unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10067         u64 *rsmmap;
10068         u64 reg;
10069         u8  rxcontext = is_a0(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
10070
10071         /* validate */
10072         if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10073             num_vls == 1 ||
10074             krcvqsset <= 1)
10075                 goto bail;
10076         for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10077                 if (krcvqs[i] > max_by_vl)
10078                         max_by_vl = krcvqs[i];
10079         if (max_by_vl > 32)
10080                 goto bail;
10081         qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10082         /* determine bits vl */
10083         n = ilog2(num_vls);
10084         /* determine bits for qpn */
10085         m = ilog2(qpns_per_vl);
10086         if ((m + n) > 7)
10087                 goto bail;
10088         if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10089                 goto bail;
10090         rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10091         memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10092         /* init the local copy of the table */
10093         for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10094                 unsigned tctxt;
10095
10096                 for (qpn = 0, tctxt = ctxt;
10097                      krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10098                         unsigned idx, regoff, regidx;
10099
10100                         /* generate index <= 128 */
10101                         idx = (qpn << n) ^ i;
10102                         regoff = (idx % 8) * 8;
10103                         regidx = idx / 8;
10104                         reg = rsmmap[regidx];
10105                         /* replace 0xff with context number */
10106                         reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10107                                 << regoff);
10108                         reg |= (u64)(tctxt++) << regoff;
10109                         rsmmap[regidx] = reg;
10110                         if (tctxt == ctxt + krcvqs[i])
10111                                 tctxt = ctxt;
10112                 }
10113                 ctxt += krcvqs[i];
10114         }
10115         /* flush cached copies to chip */
10116         for (i = 0; i < NUM_MAP_REGS; i++)
10117                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10118         /* add rule0 */
10119         write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10120                 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10121                         << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10122                 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10123         write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10124                 LRH_BTH_MATCH_OFFSET
10125                         << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10126                 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10127                 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10128                 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10129                 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10130                 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10131         write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10132                 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10133                 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10134                 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10135                 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10136         /* Enable RSM */
10137         add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10138         kfree(rsmmap);
10139         /* map everything else (non-VL15) to context 0 */
10140         init_qpmap_table(
10141                 dd,
10142                 0,
10143                 0);
10144         dd->qos_shift = n + 1;
10145         return;
10146 bail:
10147         dd->qos_shift = 1;
10148         init_qpmap_table(
10149                 dd,
10150                 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10151                 dd->n_krcv_queues - 1);
10152 }
10153
10154 static void init_rxe(struct hfi1_devdata *dd)
10155 {
10156         /* enable all receive errors */
10157         write_csr(dd, RCV_ERR_MASK, ~0ull);
10158         /* setup QPN map table - start where VL15 context leaves off */
10159         init_qos(
10160                 dd,
10161                 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10162         /*
10163          * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10164          * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10165          * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10166          * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10167          * Max_PayLoad_Size set to its minimum of 128.
10168          *
10169          * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10170          * (64 bytes).  Max_Payload_Size is possibly modified upward in
10171          * tune_pcie_caps() which is called after this routine.
10172          */
10173 }
10174
10175 static void init_other(struct hfi1_devdata *dd)
10176 {
10177         /* enable all CCE errors */
10178         write_csr(dd, CCE_ERR_MASK, ~0ull);
10179         /* enable *some* Misc errors */
10180         write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10181         /* enable all DC errors, except LCB */
10182         write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10183         write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10184 }
10185
10186 /*
10187  * Fill out the given AU table using the given CU.  A CU is defined in terms
10188  * AUs.  The table is a an encoding: given the index, how many AUs does that
10189  * represent?
10190  *
10191  * NOTE: Assumes that the register layout is the same for the
10192  * local and remote tables.
10193  */
10194 static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10195                                u32 csr0to3, u32 csr4to7)
10196 {
10197         write_csr(dd, csr0to3,
10198                    0ull <<
10199                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10200                 |  1ull <<
10201                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10202                 |  2ull * cu <<
10203                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10204                 |  4ull * cu <<
10205                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10206         write_csr(dd, csr4to7,
10207                    8ull * cu <<
10208                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10209                 | 16ull * cu <<
10210                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10211                 | 32ull * cu <<
10212                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10213                 | 64ull * cu <<
10214                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10215
10216 }
10217
10218 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10219 {
10220         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10221                                         SEND_CM_LOCAL_AU_TABLE4_TO7);
10222 }
10223
10224 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10225 {
10226         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10227                                         SEND_CM_REMOTE_AU_TABLE4_TO7);
10228 }
10229
10230 static void init_txe(struct hfi1_devdata *dd)
10231 {
10232         int i;
10233
10234         /* enable all PIO, SDMA, general, and Egress errors */
10235         write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10236         write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10237         write_csr(dd, SEND_ERR_MASK, ~0ull);
10238         write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10239
10240         /* enable all per-context and per-SDMA engine errors */
10241         for (i = 0; i < dd->chip_send_contexts; i++)
10242                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10243         for (i = 0; i < dd->chip_sdma_engines; i++)
10244                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10245
10246         /* set the local CU to AU mapping */
10247         assign_local_cm_au_table(dd, dd->vcu);
10248
10249         /*
10250          * Set reasonable default for Credit Return Timer
10251          * Don't set on Simulator - causes it to choke.
10252          */
10253         if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10254                 write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10255 }
10256
10257 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10258 {
10259         struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10260         unsigned sctxt;
10261         int ret = 0;
10262         u64 reg;
10263
10264         if (!rcd || !rcd->sc) {
10265                 ret = -EINVAL;
10266                 goto done;
10267         }
10268         sctxt = rcd->sc->hw_context;
10269         reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10270                 ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10271                  SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10272         /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10273         if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10274                 reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10275         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10276         /*
10277          * Enable send-side J_KEY integrity check, unless this is A0 h/w
10278          * (due to A0 erratum).
10279          */
10280         if (!is_a0(dd)) {
10281                 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10282                 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10283                 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10284         }
10285
10286         /* Enable J_KEY check on receive context. */
10287         reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10288                 ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10289                  RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10290         write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10291 done:
10292         return ret;
10293 }
10294
10295 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10296 {
10297         struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10298         unsigned sctxt;
10299         int ret = 0;
10300         u64 reg;
10301
10302         if (!rcd || !rcd->sc) {
10303                 ret = -EINVAL;
10304                 goto done;
10305         }
10306         sctxt = rcd->sc->hw_context;
10307         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10308         /*
10309          * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10310          * This check would not have been enabled for A0 h/w, see
10311          * set_ctxt_jkey().
10312          */
10313         if (!is_a0(dd)) {
10314                 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10315                 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10316                 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10317         }
10318         /* Turn off the J_KEY on the receive side */
10319         write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10320 done:
10321         return ret;
10322 }
10323
10324 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10325 {
10326         struct hfi1_ctxtdata *rcd;
10327         unsigned sctxt;
10328         int ret = 0;
10329         u64 reg;
10330
10331         if (ctxt < dd->num_rcv_contexts)
10332                 rcd = dd->rcd[ctxt];
10333         else {
10334                 ret = -EINVAL;
10335                 goto done;
10336         }
10337         if (!rcd || !rcd->sc) {
10338                 ret = -EINVAL;
10339                 goto done;
10340         }
10341         sctxt = rcd->sc->hw_context;
10342         reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10343                 SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10344         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10345         reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10346         reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10347         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10348 done:
10349         return ret;
10350 }
10351
10352 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10353 {
10354         struct hfi1_ctxtdata *rcd;
10355         unsigned sctxt;
10356         int ret = 0;
10357         u64 reg;
10358
10359         if (ctxt < dd->num_rcv_contexts)
10360                 rcd = dd->rcd[ctxt];
10361         else {
10362                 ret = -EINVAL;
10363                 goto done;
10364         }
10365         if (!rcd || !rcd->sc) {
10366                 ret = -EINVAL;
10367                 goto done;
10368         }
10369         sctxt = rcd->sc->hw_context;
10370         reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10371         reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10372         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10373         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10374 done:
10375         return ret;
10376 }
10377
10378 /*
10379  * Start doing the clean up the the chip. Our clean up happens in multiple
10380  * stages and this is just the first.
10381  */
10382 void hfi1_start_cleanup(struct hfi1_devdata *dd)
10383 {
10384         free_cntrs(dd);
10385         free_rcverr(dd);
10386         clean_up_interrupts(dd);
10387 }
10388
10389 #define HFI_BASE_GUID(dev) \
10390         ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10391
10392 /*
10393  * Certain chip functions need to be initialized only once per asic
10394  * instead of per-device. This function finds the peer device and
10395  * checks whether that chip initialization needs to be done by this
10396  * device.
10397  */
10398 static void asic_should_init(struct hfi1_devdata *dd)
10399 {
10400         unsigned long flags;
10401         struct hfi1_devdata *tmp, *peer = NULL;
10402
10403         spin_lock_irqsave(&hfi1_devs_lock, flags);
10404         /* Find our peer device */
10405         list_for_each_entry(tmp, &hfi1_dev_list, list) {
10406                 if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10407                     dd->unit != tmp->unit) {
10408                         peer = tmp;
10409                         break;
10410                 }
10411         }
10412
10413         /*
10414          * "Claim" the ASIC for initialization if it hasn't been
10415          " "claimed" yet.
10416          */
10417         if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10418                 dd->flags |= HFI1_DO_INIT_ASIC;
10419         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10420 }
10421
10422 /**
10423  * Allocate and initialize the device structure for the hfi.
10424  * @dev: the pci_dev for hfi1_ib device
10425  * @ent: pci_device_id struct for this dev
10426  *
10427  * Also allocates, initializes, and returns the devdata struct for this
10428  * device instance
10429  *
10430  * This is global, and is called directly at init to set up the
10431  * chip-specific function pointers for later use.
10432  */
10433 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10434                                   const struct pci_device_id *ent)
10435 {
10436         struct hfi1_devdata *dd;
10437         struct hfi1_pportdata *ppd;
10438         u64 reg;
10439         int i, ret;
10440         static const char * const inames[] = { /* implementation names */
10441                 "RTL silicon",
10442                 "RTL VCS simulation",
10443                 "RTL FPGA emulation",
10444                 "Functional simulator"
10445         };
10446
10447         dd = hfi1_alloc_devdata(pdev,
10448                 NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10449         if (IS_ERR(dd))
10450                 goto bail;
10451         ppd = dd->pport;
10452         for (i = 0; i < dd->num_pports; i++, ppd++) {
10453                 int vl;
10454                 /* init common fields */
10455                 hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10456                 /* DC supports 4 link widths */
10457                 ppd->link_width_supported =
10458                         OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10459                         OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10460                 ppd->link_width_downgrade_supported =
10461                         ppd->link_width_supported;
10462                 /* start out enabling only 4X */
10463                 ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10464                 ppd->link_width_downgrade_enabled =
10465                                         ppd->link_width_downgrade_supported;
10466                 /* link width active is 0 when link is down */
10467                 /* link width downgrade active is 0 when link is down */
10468
10469                 if (num_vls < HFI1_MIN_VLS_SUPPORTED
10470                         || num_vls > HFI1_MAX_VLS_SUPPORTED) {
10471                         hfi1_early_err(&pdev->dev,
10472                                        "Invalid num_vls %u, using %u VLs\n",
10473                                     num_vls, HFI1_MAX_VLS_SUPPORTED);
10474                         num_vls = HFI1_MAX_VLS_SUPPORTED;
10475                 }
10476                 ppd->vls_supported = num_vls;
10477                 ppd->vls_operational = ppd->vls_supported;
10478                 /* Set the default MTU. */
10479                 for (vl = 0; vl < num_vls; vl++)
10480                         dd->vld[vl].mtu = hfi1_max_mtu;
10481                 dd->vld[15].mtu = MAX_MAD_PACKET;
10482                 /*
10483                  * Set the initial values to reasonable default, will be set
10484                  * for real when link is up.
10485                  */
10486                 ppd->lstate = IB_PORT_DOWN;
10487                 ppd->overrun_threshold = 0x4;
10488                 ppd->phy_error_threshold = 0xf;
10489                 ppd->port_crc_mode_enabled = link_crc_mask;
10490                 /* initialize supported LTP CRC mode */
10491                 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10492                 /* initialize enabled LTP CRC mode */
10493                 ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10494                 /* start in offline */
10495                 ppd->host_link_state = HLS_DN_OFFLINE;
10496                 init_vl_arb_caches(ppd);
10497         }
10498
10499         dd->link_default = HLS_DN_POLL;
10500
10501         /*
10502          * Do remaining PCIe setup and save PCIe values in dd.
10503          * Any error printing is already done by the init code.
10504          * On return, we have the chip mapped.
10505          */
10506         ret = hfi1_pcie_ddinit(dd, pdev, ent);
10507         if (ret < 0)
10508                 goto bail_free;
10509
10510         /* verify that reads actually work, save revision for reset check */
10511         dd->revision = read_csr(dd, CCE_REVISION);
10512         if (dd->revision == ~(u64)0) {
10513                 dd_dev_err(dd, "cannot read chip CSRs\n");
10514                 ret = -EINVAL;
10515                 goto bail_cleanup;
10516         }
10517         dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10518                         & CCE_REVISION_CHIP_REV_MAJOR_MASK;
10519         dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10520                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
10521
10522         /* obtain the hardware ID - NOT related to unit, which is a
10523            software enumeration */
10524         reg = read_csr(dd, CCE_REVISION2);
10525         dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10526                                         & CCE_REVISION2_HFI_ID_MASK;
10527         /* the variable size will remove unwanted bits */
10528         dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10529         dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10530         dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10531                 dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10532                 (int)dd->irev);
10533
10534         /* speeds the hardware can support */
10535         dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10536         /* speeds allowed to run at */
10537         dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10538         /* give a reasonable active value, will be set on link up */
10539         dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10540
10541         dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10542         dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10543         dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10544         dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10545         dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10546         /* fix up link widths for emulation _p */
10547         ppd = dd->pport;
10548         if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10549                 ppd->link_width_supported =
10550                         ppd->link_width_enabled =
10551                         ppd->link_width_downgrade_supported =
10552                         ppd->link_width_downgrade_enabled =
10553                                 OPA_LINK_WIDTH_1X;
10554         }
10555         /* insure num_vls isn't larger than number of sdma engines */
10556         if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10557                 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10558                                 num_vls, HFI1_MAX_VLS_SUPPORTED);
10559                 ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10560                 ppd->vls_operational = ppd->vls_supported;
10561         }
10562
10563         /*
10564          * Convert the ns parameter to the 64 * cclocks used in the CSR.
10565          * Limit the max if larger than the field holds.  If timeout is
10566          * non-zero, then the calculated field will be at least 1.
10567          *
10568          * Must be after icode is set up - the cclock rate depends
10569          * on knowing the hardware being used.
10570          */
10571         dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10572         if (dd->rcv_intr_timeout_csr >
10573                         RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10574                 dd->rcv_intr_timeout_csr =
10575                         RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10576         else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10577                 dd->rcv_intr_timeout_csr = 1;
10578
10579         /* needs to be done before we look for the peer device */
10580         read_guid(dd);
10581
10582         /* should this device init the ASIC block? */
10583         asic_should_init(dd);
10584
10585         /* obtain chip sizes, reset chip CSRs */
10586         init_chip(dd);
10587
10588         /* read in the PCIe link speed information */
10589         ret = pcie_speeds(dd);
10590         if (ret)
10591                 goto bail_cleanup;
10592
10593         /* read in firmware */
10594         ret = hfi1_firmware_init(dd);
10595         if (ret)
10596                 goto bail_cleanup;
10597
10598         /*
10599          * In general, the PCIe Gen3 transition must occur after the
10600          * chip has been idled (so it won't initiate any PCIe transactions
10601          * e.g. an interrupt) and before the driver changes any registers
10602          * (the transition will reset the registers).
10603          *
10604          * In particular, place this call after:
10605          * - init_chip()     - the chip will not initiate any PCIe transactions
10606          * - pcie_speeds()   - reads the current link speed
10607          * - hfi1_firmware_init() - the needed firmware is ready to be
10608          *                          downloaded
10609          */
10610         ret = do_pcie_gen3_transition(dd);
10611         if (ret)
10612                 goto bail_cleanup;
10613
10614         /* start setting dd values and adjusting CSRs */
10615         init_early_variables(dd);
10616
10617         parse_platform_config(dd);
10618
10619         /* add board names as they are defined */
10620         dd->boardname = kmalloc(64, GFP_KERNEL);
10621         if (!dd->boardname)
10622                 goto bail_cleanup;
10623         snprintf(dd->boardname, 64, "Board ID 0x%llx",
10624                  dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10625                     & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10626
10627         snprintf(dd->boardversion, BOARD_VERS_MAX,
10628                  "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10629                  HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10630                  dd->boardname,
10631                  (u32)dd->majrev,
10632                  (u32)dd->minrev,
10633                  (dd->revision >> CCE_REVISION_SW_SHIFT)
10634                     & CCE_REVISION_SW_MASK);
10635
10636         ret = set_up_context_variables(dd);
10637         if (ret)
10638                 goto bail_cleanup;
10639
10640         /* set initial RXE CSRs */
10641         init_rxe(dd);
10642         /* set initial TXE CSRs */
10643         init_txe(dd);
10644         /* set initial non-RXE, non-TXE CSRs */
10645         init_other(dd);
10646         /* set up KDETH QP prefix in both RX and TX CSRs */
10647         init_kdeth_qp(dd);
10648
10649         /* send contexts must be set up before receive contexts */
10650         ret = init_send_contexts(dd);
10651         if (ret)
10652                 goto bail_cleanup;
10653
10654         ret = hfi1_create_ctxts(dd);
10655         if (ret)
10656                 goto bail_cleanup;
10657
10658         dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10659         /*
10660          * rcd[0] is guaranteed to be valid by this point. Also, all
10661          * context are using the same value, as per the module parameter.
10662          */
10663         dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10664
10665         ret = init_pervl_scs(dd);
10666         if (ret)
10667                 goto bail_cleanup;
10668
10669         /* sdma init */
10670         for (i = 0; i < dd->num_pports; ++i) {
10671                 ret = sdma_init(dd, i);
10672                 if (ret)
10673                         goto bail_cleanup;
10674         }
10675
10676         /* use contexts created by hfi1_create_ctxts */
10677         ret = set_up_interrupts(dd);
10678         if (ret)
10679                 goto bail_cleanup;
10680
10681         /* set up LCB access - must be after set_up_interrupts() */
10682         init_lcb_access(dd);
10683
10684         snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10685                  dd->base_guid & 0xFFFFFF);
10686
10687         dd->oui1 = dd->base_guid >> 56 & 0xFF;
10688         dd->oui2 = dd->base_guid >> 48 & 0xFF;
10689         dd->oui3 = dd->base_guid >> 40 & 0xFF;
10690
10691         ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10692         if (ret)
10693                 goto bail_clear_intr;
10694         check_fabric_firmware_versions(dd);
10695
10696         thermal_init(dd);
10697
10698         ret = init_cntrs(dd);
10699         if (ret)
10700                 goto bail_clear_intr;
10701
10702         ret = init_rcverr(dd);
10703         if (ret)
10704                 goto bail_free_cntrs;
10705
10706         ret = eprom_init(dd);
10707         if (ret)
10708                 goto bail_free_rcverr;
10709
10710         goto bail;
10711
10712 bail_free_rcverr:
10713         free_rcverr(dd);
10714 bail_free_cntrs:
10715         free_cntrs(dd);
10716 bail_clear_intr:
10717         clean_up_interrupts(dd);
10718 bail_cleanup:
10719         hfi1_pcie_ddcleanup(dd);
10720 bail_free:
10721         hfi1_free_devdata(dd);
10722         dd = ERR_PTR(ret);
10723 bail:
10724         return dd;
10725 }
10726
10727 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10728                         u32 dw_len)
10729 {
10730         u32 delta_cycles;
10731         u32 current_egress_rate = ppd->current_egress_rate;
10732         /* rates here are in units of 10^6 bits/sec */
10733
10734         if (desired_egress_rate == -1)
10735                 return 0; /* shouldn't happen */
10736
10737         if (desired_egress_rate >= current_egress_rate)
10738                 return 0; /* we can't help go faster, only slower */
10739
10740         delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10741                         egress_cycles(dw_len * 4, current_egress_rate);
10742
10743         return (u16)delta_cycles;
10744 }
10745
10746
10747 /**
10748  * create_pbc - build a pbc for transmission
10749  * @flags: special case flags or-ed in built pbc
10750  * @srate: static rate
10751  * @vl: vl
10752  * @dwlen: dword length (header words + data words + pbc words)
10753  *
10754  * Create a PBC with the given flags, rate, VL, and length.
10755  *
10756  * NOTE: The PBC created will not insert any HCRC - all callers but one are
10757  * for verbs, which does not use this PSM feature.  The lone other caller
10758  * is for the diagnostic interface which calls this if the user does not
10759  * supply their own PBC.
10760  */
10761 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10762                u32 dw_len)
10763 {
10764         u64 pbc, delay = 0;
10765
10766         if (unlikely(srate_mbs))
10767                 delay = delay_cycles(ppd, srate_mbs, dw_len);
10768
10769         pbc = flags
10770                 | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10771                 | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10772                 | (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10773                 | (dw_len & PBC_LENGTH_DWS_MASK)
10774                         << PBC_LENGTH_DWS_SHIFT;
10775
10776         return pbc;
10777 }
10778
10779 #define SBUS_THERMAL    0x4f
10780 #define SBUS_THERM_MONITOR_MODE 0x1
10781
10782 #define THERM_FAILURE(dev, ret, reason) \
10783         dd_dev_err((dd),                                                \
10784                    "Thermal sensor initialization failed: %s (%d)\n",   \
10785                    (reason), (ret))
10786
10787 /*
10788  * Initialize the Avago Thermal sensor.
10789  *
10790  * After initialization, enable polling of thermal sensor through
10791  * SBus interface. In order for this to work, the SBus Master
10792  * firmware has to be loaded due to the fact that the HW polling
10793  * logic uses SBus interrupts, which are not supported with
10794  * default firmware. Otherwise, no data will be returned through
10795  * the ASIC_STS_THERM CSR.
10796  */
10797 static int thermal_init(struct hfi1_devdata *dd)
10798 {
10799         int ret = 0;
10800
10801         if (dd->icode != ICODE_RTL_SILICON ||
10802             !(dd->flags & HFI1_DO_INIT_ASIC))
10803                 return ret;
10804
10805         acquire_hw_mutex(dd);
10806         dd_dev_info(dd, "Initializing thermal sensor\n");
10807
10808         /* Thermal Sensor Initialization */
10809         /*    Step 1: Reset the Thermal SBus Receiver */
10810         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10811                                 RESET_SBUS_RECEIVER, 0);
10812         if (ret) {
10813                 THERM_FAILURE(dd, ret, "Bus Reset");
10814                 goto done;
10815         }
10816         /*    Step 2: Set Reset bit in Thermal block */
10817         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10818                                 WRITE_SBUS_RECEIVER, 0x1);
10819         if (ret) {
10820                 THERM_FAILURE(dd, ret, "Therm Block Reset");
10821                 goto done;
10822         }
10823         /*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10824         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10825                                 WRITE_SBUS_RECEIVER, 0x32);
10826         if (ret) {
10827                 THERM_FAILURE(dd, ret, "Write Clock Div");
10828                 goto done;
10829         }
10830         /*    Step 4: Select temperature mode */
10831         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10832                                 WRITE_SBUS_RECEIVER,
10833                                 SBUS_THERM_MONITOR_MODE);
10834         if (ret) {
10835                 THERM_FAILURE(dd, ret, "Write Mode Sel");
10836                 goto done;
10837         }
10838         /*    Step 5: De-assert block reset and start conversion */
10839         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10840                                 WRITE_SBUS_RECEIVER, 0x2);
10841         if (ret) {
10842                 THERM_FAILURE(dd, ret, "Write Reset Deassert");
10843                 goto done;
10844         }
10845         /*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10846         msleep(22);
10847
10848         /* Enable polling of thermal readings */
10849         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10850 done:
10851         release_hw_mutex(dd);
10852         return ret;
10853 }
10854
10855 static void handle_temp_err(struct hfi1_devdata *dd)
10856 {
10857         struct hfi1_pportdata *ppd = &dd->pport[0];
10858         /*
10859          * Thermal Critical Interrupt
10860          * Put the device into forced freeze mode, take link down to
10861          * offline, and put DC into reset.
10862          */
10863         dd_dev_emerg(dd,
10864                      "Critical temperature reached! Forcing device into freeze mode!\n");
10865         dd->flags |= HFI1_FORCED_FREEZE;
10866         start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10867         /*
10868          * Shut DC down as much and as quickly as possible.
10869          *
10870          * Step 1: Take the link down to OFFLINE. This will cause the
10871          *         8051 to put the Serdes in reset. However, we don't want to
10872          *         go through the entire link state machine since we want to
10873          *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10874          *         but rather an attempt to save the chip.
10875          *         Code below is almost the same as quiet_serdes() but avoids
10876          *         all the extra work and the sleeps.
10877          */
10878         ppd->driver_link_ready = 0;
10879         ppd->link_enabled = 0;
10880         set_physical_link_state(dd, PLS_OFFLINE |
10881                                 (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10882         /*
10883          * Step 2: Shutdown LCB and 8051
10884          *         After shutdown, do not restore DC_CFG_RESET value.
10885          */
10886         dc_shutdown(dd);
10887 }