GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / pci / pcie / err.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file implements the error recovery as a core part of PCIe error
4  * reporting. When a PCIe error is delivered, an error message will be
5  * collected and printed to console, then, an error recovery procedure
6  * will be executed by following the PCI error recovery rules.
7  *
8  * Copyright (C) 2006 Intel Corp.
9  *      Tom Long Nguyen (tom.l.nguyen@intel.com)
10  *      Zhang Yanmin (yanmin.zhang@intel.com)
11  */
12
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
19 #include "portdrv.h"
20 #include "../pci.h"
21
22 struct aer_broadcast_data {
23         enum pci_channel_state state;
24         enum pci_ers_result result;
25 };
26
27 static pci_ers_result_t merge_result(enum pci_ers_result orig,
28                                   enum pci_ers_result new)
29 {
30         if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31                 return PCI_ERS_RESULT_NO_AER_DRIVER;
32
33         if (new == PCI_ERS_RESULT_NONE)
34                 return orig;
35
36         switch (orig) {
37         case PCI_ERS_RESULT_CAN_RECOVER:
38         case PCI_ERS_RESULT_RECOVERED:
39                 orig = new;
40                 break;
41         case PCI_ERS_RESULT_DISCONNECT:
42                 if (new == PCI_ERS_RESULT_NEED_RESET)
43                         orig = PCI_ERS_RESULT_NEED_RESET;
44                 break;
45         default:
46                 break;
47         }
48
49         return orig;
50 }
51
52 static int report_error_detected(struct pci_dev *dev, void *data)
53 {
54         pci_ers_result_t vote;
55         const struct pci_error_handlers *err_handler;
56         struct aer_broadcast_data *result_data;
57
58         result_data = (struct aer_broadcast_data *) data;
59
60         device_lock(&dev->dev);
61         dev->error_state = result_data->state;
62
63         if (!dev->driver ||
64                 !dev->driver->err_handler ||
65                 !dev->driver->err_handler->error_detected) {
66                 /*
67                  * If any device in the subtree does not have an error_detected
68                  * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
69                  * error callbacks of "any" device in the subtree, and will
70                  * exit in the disconnected error state.
71                  */
72                 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
73                         vote = PCI_ERS_RESULT_NO_AER_DRIVER;
74                 else
75                         vote = PCI_ERS_RESULT_NONE;
76         } else {
77                 err_handler = dev->driver->err_handler;
78                 vote = err_handler->error_detected(dev, result_data->state);
79                 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
80         }
81
82         result_data->result = merge_result(result_data->result, vote);
83         device_unlock(&dev->dev);
84         return 0;
85 }
86
87 static int report_mmio_enabled(struct pci_dev *dev, void *data)
88 {
89         pci_ers_result_t vote;
90         const struct pci_error_handlers *err_handler;
91         struct aer_broadcast_data *result_data;
92
93         result_data = (struct aer_broadcast_data *) data;
94
95         device_lock(&dev->dev);
96         if (!dev->driver ||
97                 !dev->driver->err_handler ||
98                 !dev->driver->err_handler->mmio_enabled)
99                 goto out;
100
101         err_handler = dev->driver->err_handler;
102         vote = err_handler->mmio_enabled(dev);
103         result_data->result = merge_result(result_data->result, vote);
104 out:
105         device_unlock(&dev->dev);
106         return 0;
107 }
108
109 static int report_slot_reset(struct pci_dev *dev, void *data)
110 {
111         pci_ers_result_t vote;
112         const struct pci_error_handlers *err_handler;
113         struct aer_broadcast_data *result_data;
114
115         result_data = (struct aer_broadcast_data *) data;
116
117         device_lock(&dev->dev);
118         if (!dev->driver ||
119                 !dev->driver->err_handler ||
120                 !dev->driver->err_handler->slot_reset)
121                 goto out;
122
123         err_handler = dev->driver->err_handler;
124         vote = err_handler->slot_reset(dev);
125         result_data->result = merge_result(result_data->result, vote);
126 out:
127         device_unlock(&dev->dev);
128         return 0;
129 }
130
131 static int report_resume(struct pci_dev *dev, void *data)
132 {
133         const struct pci_error_handlers *err_handler;
134
135         device_lock(&dev->dev);
136         dev->error_state = pci_channel_io_normal;
137
138         if (!dev->driver ||
139                 !dev->driver->err_handler ||
140                 !dev->driver->err_handler->resume)
141                 goto out;
142
143         err_handler = dev->driver->err_handler;
144         err_handler->resume(dev);
145         pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
146 out:
147         device_unlock(&dev->dev);
148         return 0;
149 }
150
151 /**
152  * default_reset_link - default reset function
153  * @dev: pointer to pci_dev data structure
154  *
155  * Invoked when performing link reset on a Downstream Port or a
156  * Root Port with no aer driver.
157  */
158 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
159 {
160         int rc;
161
162         rc = pci_bus_error_reset(dev);
163         pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
164         return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
165 }
166
167 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
168 {
169         pci_ers_result_t status;
170         struct pcie_port_service_driver *driver = NULL;
171
172         driver = pcie_port_find_service(dev, service);
173         if (driver && driver->reset_link) {
174                 status = driver->reset_link(dev);
175         } else if (dev->has_secondary_link) {
176                 status = default_reset_link(dev);
177         } else {
178                 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
179                         pci_name(dev));
180                 return PCI_ERS_RESULT_DISCONNECT;
181         }
182
183         if (status != PCI_ERS_RESULT_RECOVERED) {
184                 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
185                         pci_name(dev));
186                 return PCI_ERS_RESULT_DISCONNECT;
187         }
188
189         return status;
190 }
191
192 /**
193  * broadcast_error_message - handle message broadcast to downstream drivers
194  * @dev: pointer to from where in a hierarchy message is broadcasted down
195  * @state: error state
196  * @error_mesg: message to print
197  * @cb: callback to be broadcasted
198  *
199  * Invoked during error recovery process. Once being invoked, the content
200  * of error severity will be broadcasted to all downstream drivers in a
201  * hierarchy in question.
202  */
203 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
204         enum pci_channel_state state,
205         char *error_mesg,
206         int (*cb)(struct pci_dev *, void *))
207 {
208         struct aer_broadcast_data result_data;
209
210         pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
211         result_data.state = state;
212         if (cb == report_error_detected)
213                 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
214         else
215                 result_data.result = PCI_ERS_RESULT_RECOVERED;
216
217         pci_walk_bus(dev->subordinate, cb, &result_data);
218         return result_data.result;
219 }
220
221 /**
222  * pcie_do_fatal_recovery - handle fatal error recovery process
223  * @dev: pointer to a pci_dev data structure of agent detecting an error
224  *
225  * Invoked when an error is fatal. Once being invoked, removes the devices
226  * beneath this AER agent, followed by reset link e.g. secondary bus reset
227  * followed by re-enumeration of devices.
228  */
229 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
230 {
231         struct pci_dev *udev;
232         struct pci_bus *parent;
233         struct pci_dev *pdev, *temp;
234         pci_ers_result_t result;
235
236         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
237                 udev = dev;
238         else
239                 udev = dev->bus->self;
240
241         parent = udev->subordinate;
242         pci_lock_rescan_remove();
243         pci_dev_get(dev);
244         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
245                                          bus_list) {
246                 pci_dev_get(pdev);
247                 pci_dev_set_disconnected(pdev, NULL);
248                 if (pci_has_subordinate(pdev))
249                         pci_walk_bus(pdev->subordinate,
250                                      pci_dev_set_disconnected, NULL);
251                 pci_stop_and_remove_bus_device(pdev);
252                 pci_dev_put(pdev);
253         }
254
255         result = reset_link(udev, service);
256
257         if ((service == PCIE_PORT_SERVICE_AER) &&
258             (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
259                 /*
260                  * If the error is reported by a bridge, we think this error
261                  * is related to the downstream link of the bridge, so we
262                  * do error recovery on all subordinates of the bridge instead
263                  * of the bridge and clear the error status of the bridge.
264                  */
265                 pci_aer_clear_fatal_status(dev);
266                 pci_aer_clear_device_status(dev);
267         }
268
269         if (result == PCI_ERS_RESULT_RECOVERED) {
270                 if (pcie_wait_for_link(udev, true))
271                         pci_rescan_bus(udev->bus);
272                 pci_info(dev, "Device recovery from fatal error successful\n");
273         } else {
274                 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
275                 pci_info(dev, "Device recovery from fatal error failed\n");
276         }
277
278         pci_dev_put(dev);
279         pci_unlock_rescan_remove();
280 }
281
282 /**
283  * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
284  * @dev: pointer to a pci_dev data structure of agent detecting an error
285  *
286  * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
287  * error detected message to all downstream drivers within a hierarchy in
288  * question and return the returned code.
289  */
290 void pcie_do_nonfatal_recovery(struct pci_dev *dev)
291 {
292         pci_ers_result_t status;
293         enum pci_channel_state state;
294
295         state = pci_channel_io_normal;
296
297         /*
298          * Error recovery runs on all subordinates of the first downstream port.
299          * If the downstream port detected the error, it is cleared at the end.
300          */
301         if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
302               pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
303                 dev = dev->bus->self;
304
305         status = broadcast_error_message(dev,
306                         state,
307                         "error_detected",
308                         report_error_detected);
309
310         if (status == PCI_ERS_RESULT_CAN_RECOVER)
311                 status = broadcast_error_message(dev,
312                                 state,
313                                 "mmio_enabled",
314                                 report_mmio_enabled);
315
316         if (status == PCI_ERS_RESULT_NEED_RESET) {
317                 /*
318                  * TODO: Should call platform-specific
319                  * functions to reset slot before calling
320                  * drivers' slot_reset callbacks?
321                  */
322                 status = broadcast_error_message(dev,
323                                 state,
324                                 "slot_reset",
325                                 report_slot_reset);
326         }
327
328         if (status != PCI_ERS_RESULT_RECOVERED)
329                 goto failed;
330
331         broadcast_error_message(dev,
332                                 state,
333                                 "resume",
334                                 report_resume);
335
336         pci_aer_clear_device_status(dev);
337         pci_cleanup_aer_uncorrect_error_status(dev);
338         pci_info(dev, "AER: Device recovery successful\n");
339         return;
340
341 failed:
342         pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
343
344         /* TODO: Should kernel panic here? */
345         pci_info(dev, "AER: Device recovery failed\n");
346 }