GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 /*
60  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
61  * global register space are still, in fact, using a hypervisor to mediate it
62  * by trapping and emulating register accesses. Sadly, some deployed versions
63  * of said trapping code have bugs wherein they go horribly wrong for stores
64  * using r31 (i.e. XZR/WZR) as the source register.
65  */
66 #define QCOM_DUMMY_VAL -1
67
68 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
69
70 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
71 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
72 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
73
74 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
75 #define TLB_SPIN_COUNT                  10
76
77 /* Maximum number of context banks per SMMU */
78 #define ARM_SMMU_MAX_CBS                128
79
80 /* SMMU global address space */
81 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
82 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
83
84 /*
85  * SMMU global address space with conditional offset to access secure
86  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
87  * nsGFSYNR0: 0x450)
88  */
89 #define ARM_SMMU_GR0_NS(smmu)                                           \
90         ((smmu)->base +                                                 \
91                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
92                         ? 0x400 : 0))
93
94 /*
95  * Some 64-bit registers only make sense to write atomically, but in such
96  * cases all the data relevant to AArch32 formats lies within the lower word,
97  * therefore this actually makes more sense than it might first appear.
98  */
99 #ifdef CONFIG_64BIT
100 #define smmu_write_atomic_lq            writeq_relaxed
101 #else
102 #define smmu_write_atomic_lq            writel_relaxed
103 #endif
104
105 /* Translation context bank */
106 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
107
108 #define MSI_IOVA_BASE                   0x8000000
109 #define MSI_IOVA_LENGTH                 0x100000
110
111 static int force_stage;
112 module_param(force_stage, int, S_IRUGO);
113 MODULE_PARM_DESC(force_stage,
114         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
115 static bool disable_bypass;
116 module_param(disable_bypass, bool, S_IRUGO);
117 MODULE_PARM_DESC(disable_bypass,
118         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
119
120 enum arm_smmu_arch_version {
121         ARM_SMMU_V1,
122         ARM_SMMU_V1_64K,
123         ARM_SMMU_V2,
124 };
125
126 enum arm_smmu_implementation {
127         GENERIC_SMMU,
128         ARM_MMU500,
129         CAVIUM_SMMUV2,
130         QCOM_SMMUV2,
131 };
132
133 struct arm_smmu_s2cr {
134         struct iommu_group              *group;
135         int                             count;
136         enum arm_smmu_s2cr_type         type;
137         enum arm_smmu_s2cr_privcfg      privcfg;
138         u8                              cbndx;
139 };
140
141 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
142         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
143 }
144
145 struct arm_smmu_smr {
146         u16                             mask;
147         u16                             id;
148         bool                            valid;
149 };
150
151 struct arm_smmu_cb {
152         u64                             ttbr[2];
153         u32                             tcr[2];
154         u32                             mair[2];
155         struct arm_smmu_cfg             *cfg;
156 };
157
158 struct arm_smmu_master_cfg {
159         struct arm_smmu_device          *smmu;
160         s16                             smendx[];
161 };
162 #define INVALID_SMENDX                  -1
163 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
164 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
165 #define fwspec_smendx(fw, i) \
166         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
167 #define for_each_cfg_sme(fw, i, idx) \
168         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
169
170 struct arm_smmu_device {
171         struct device                   *dev;
172
173         void __iomem                    *base;
174         void __iomem                    *cb_base;
175         unsigned long                   pgshift;
176
177 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
178 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
179 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
180 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
181 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
182 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
183 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
184 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
185 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
186 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
187 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
188 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
189 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
190         u32                             features;
191
192 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
193         u32                             options;
194         enum arm_smmu_arch_version      version;
195         enum arm_smmu_implementation    model;
196
197         u32                             num_context_banks;
198         u32                             num_s2_context_banks;
199         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
200         struct arm_smmu_cb              *cbs;
201         atomic_t                        irptndx;
202
203         u32                             num_mapping_groups;
204         u16                             streamid_mask;
205         u16                             smr_mask_mask;
206         struct arm_smmu_smr             *smrs;
207         struct arm_smmu_s2cr            *s2crs;
208         struct mutex                    stream_map_mutex;
209
210         unsigned long                   va_size;
211         unsigned long                   ipa_size;
212         unsigned long                   pa_size;
213         unsigned long                   pgsize_bitmap;
214
215         u32                             num_global_irqs;
216         u32                             num_context_irqs;
217         unsigned int                    *irqs;
218
219         u32                             cavium_id_base; /* Specific to Cavium */
220
221         spinlock_t                      global_sync_lock;
222
223         /* IOMMU core code handle */
224         struct iommu_device             iommu;
225 };
226
227 enum arm_smmu_context_fmt {
228         ARM_SMMU_CTX_FMT_NONE,
229         ARM_SMMU_CTX_FMT_AARCH64,
230         ARM_SMMU_CTX_FMT_AARCH32_L,
231         ARM_SMMU_CTX_FMT_AARCH32_S,
232 };
233
234 struct arm_smmu_cfg {
235         u8                              cbndx;
236         u8                              irptndx;
237         union {
238                 u16                     asid;
239                 u16                     vmid;
240         };
241         u32                             cbar;
242         enum arm_smmu_context_fmt       fmt;
243 };
244 #define INVALID_IRPTNDX                 0xff
245
246 enum arm_smmu_domain_stage {
247         ARM_SMMU_DOMAIN_S1 = 0,
248         ARM_SMMU_DOMAIN_S2,
249         ARM_SMMU_DOMAIN_NESTED,
250         ARM_SMMU_DOMAIN_BYPASS,
251 };
252
253 struct arm_smmu_domain {
254         struct arm_smmu_device          *smmu;
255         struct io_pgtable_ops           *pgtbl_ops;
256         const struct iommu_gather_ops   *tlb_ops;
257         struct arm_smmu_cfg             cfg;
258         enum arm_smmu_domain_stage      stage;
259         struct mutex                    init_mutex; /* Protects smmu pointer */
260         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
261         struct iommu_domain             domain;
262 };
263
264 struct arm_smmu_option_prop {
265         u32 opt;
266         const char *prop;
267 };
268
269 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
270
271 static bool using_legacy_binding, using_generic_binding;
272
273 static struct arm_smmu_option_prop arm_smmu_options[] = {
274         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
275         { 0, NULL},
276 };
277
278 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
279 {
280         return container_of(dom, struct arm_smmu_domain, domain);
281 }
282
283 static void parse_driver_options(struct arm_smmu_device *smmu)
284 {
285         int i = 0;
286
287         do {
288                 if (of_property_read_bool(smmu->dev->of_node,
289                                                 arm_smmu_options[i].prop)) {
290                         smmu->options |= arm_smmu_options[i].opt;
291                         dev_notice(smmu->dev, "option %s\n",
292                                 arm_smmu_options[i].prop);
293                 }
294         } while (arm_smmu_options[++i].opt);
295 }
296
297 static struct device_node *dev_get_dev_node(struct device *dev)
298 {
299         if (dev_is_pci(dev)) {
300                 struct pci_bus *bus = to_pci_dev(dev)->bus;
301
302                 while (!pci_is_root_bus(bus))
303                         bus = bus->parent;
304                 return of_node_get(bus->bridge->parent->of_node);
305         }
306
307         return of_node_get(dev->of_node);
308 }
309
310 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
311 {
312         *((__be32 *)data) = cpu_to_be32(alias);
313         return 0; /* Continue walking */
314 }
315
316 static int __find_legacy_master_phandle(struct device *dev, void *data)
317 {
318         struct of_phandle_iterator *it = *(void **)data;
319         struct device_node *np = it->node;
320         int err;
321
322         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
323                             "#stream-id-cells", 0)
324                 if (it->node == np) {
325                         *(void **)data = dev;
326                         return 1;
327                 }
328         it->node = np;
329         return err == -ENOENT ? 0 : err;
330 }
331
332 static struct platform_driver arm_smmu_driver;
333 static struct iommu_ops arm_smmu_ops;
334
335 static int arm_smmu_register_legacy_master(struct device *dev,
336                                            struct arm_smmu_device **smmu)
337 {
338         struct device *smmu_dev;
339         struct device_node *np;
340         struct of_phandle_iterator it;
341         void *data = &it;
342         u32 *sids;
343         __be32 pci_sid;
344         int err;
345
346         np = dev_get_dev_node(dev);
347         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
348                 of_node_put(np);
349                 return -ENODEV;
350         }
351
352         it.node = np;
353         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
354                                      __find_legacy_master_phandle);
355         smmu_dev = data;
356         of_node_put(np);
357         if (err == 0)
358                 return -ENODEV;
359         if (err < 0)
360                 return err;
361
362         if (dev_is_pci(dev)) {
363                 /* "mmu-masters" assumes Stream ID == Requester ID */
364                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
365                                        &pci_sid);
366                 it.cur = &pci_sid;
367                 it.cur_count = 1;
368         }
369
370         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
371                                 &arm_smmu_ops);
372         if (err)
373                 return err;
374
375         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
376         if (!sids)
377                 return -ENOMEM;
378
379         *smmu = dev_get_drvdata(smmu_dev);
380         of_phandle_iterator_args(&it, sids, it.cur_count);
381         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
382         kfree(sids);
383         return err;
384 }
385
386 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
387 {
388         int idx;
389
390         do {
391                 idx = find_next_zero_bit(map, end, start);
392                 if (idx == end)
393                         return -ENOSPC;
394         } while (test_and_set_bit(idx, map));
395
396         return idx;
397 }
398
399 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
400 {
401         clear_bit(idx, map);
402 }
403
404 /* Wait for any pending TLB invalidations to complete */
405 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
406                                 void __iomem *sync, void __iomem *status)
407 {
408         unsigned int spin_cnt, delay;
409
410         writel_relaxed(QCOM_DUMMY_VAL, sync);
411         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
412                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
413                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
414                                 return;
415                         cpu_relax();
416                 }
417                 udelay(delay);
418         }
419         dev_err_ratelimited(smmu->dev,
420                             "TLB sync timed out -- SMMU may be deadlocked\n");
421 }
422
423 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
424 {
425         void __iomem *base = ARM_SMMU_GR0(smmu);
426         unsigned long flags;
427
428         spin_lock_irqsave(&smmu->global_sync_lock, flags);
429         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
430                             base + ARM_SMMU_GR0_sTLBGSTATUS);
431         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
432 }
433
434 static void arm_smmu_tlb_sync_context(void *cookie)
435 {
436         struct arm_smmu_domain *smmu_domain = cookie;
437         struct arm_smmu_device *smmu = smmu_domain->smmu;
438         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
439         unsigned long flags;
440
441         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
442         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
443                             base + ARM_SMMU_CB_TLBSTATUS);
444         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
445 }
446
447 static void arm_smmu_tlb_sync_vmid(void *cookie)
448 {
449         struct arm_smmu_domain *smmu_domain = cookie;
450
451         arm_smmu_tlb_sync_global(smmu_domain->smmu);
452 }
453
454 static void arm_smmu_tlb_inv_context_s1(void *cookie)
455 {
456         struct arm_smmu_domain *smmu_domain = cookie;
457         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
458         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
459
460         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
461         arm_smmu_tlb_sync_context(cookie);
462 }
463
464 static void arm_smmu_tlb_inv_context_s2(void *cookie)
465 {
466         struct arm_smmu_domain *smmu_domain = cookie;
467         struct arm_smmu_device *smmu = smmu_domain->smmu;
468         void __iomem *base = ARM_SMMU_GR0(smmu);
469
470         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
471         arm_smmu_tlb_sync_global(smmu);
472 }
473
474 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
475                                           size_t granule, bool leaf, void *cookie)
476 {
477         struct arm_smmu_domain *smmu_domain = cookie;
478         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
479         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
480         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
481
482         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
483                 wmb();
484
485         if (stage1) {
486                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
487
488                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
489                         iova &= ~12UL;
490                         iova |= cfg->asid;
491                         do {
492                                 writel_relaxed(iova, reg);
493                                 iova += granule;
494                         } while (size -= granule);
495                 } else {
496                         iova >>= 12;
497                         iova |= (u64)cfg->asid << 48;
498                         do {
499                                 writeq_relaxed(iova, reg);
500                                 iova += granule >> 12;
501                         } while (size -= granule);
502                 }
503         } else {
504                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
505                               ARM_SMMU_CB_S2_TLBIIPAS2;
506                 iova >>= 12;
507                 do {
508                         smmu_write_atomic_lq(iova, reg);
509                         iova += granule >> 12;
510                 } while (size -= granule);
511         }
512 }
513
514 /*
515  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
516  * almost negligible, but the benefit of getting the first one in as far ahead
517  * of the sync as possible is significant, hence we don't just make this a
518  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
519  */
520 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
521                                          size_t granule, bool leaf, void *cookie)
522 {
523         struct arm_smmu_domain *smmu_domain = cookie;
524         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
525
526         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
527                 wmb();
528
529         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
530 }
531
532 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
533         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
534         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
535         .tlb_sync       = arm_smmu_tlb_sync_context,
536 };
537
538 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
539         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
540         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
541         .tlb_sync       = arm_smmu_tlb_sync_context,
542 };
543
544 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
545         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
546         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
547         .tlb_sync       = arm_smmu_tlb_sync_vmid,
548 };
549
550 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
551 {
552         u32 fsr, fsynr;
553         unsigned long iova;
554         struct iommu_domain *domain = dev;
555         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
556         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
557         struct arm_smmu_device *smmu = smmu_domain->smmu;
558         void __iomem *cb_base;
559
560         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
561         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
562
563         if (!(fsr & FSR_FAULT))
564                 return IRQ_NONE;
565
566         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
567         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
568
569         dev_err_ratelimited(smmu->dev,
570         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
571                             fsr, iova, fsynr, cfg->cbndx);
572
573         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
574         return IRQ_HANDLED;
575 }
576
577 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
578 {
579         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
580         struct arm_smmu_device *smmu = dev;
581         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
582
583         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
584         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
585         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
586         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
587
588         if (!gfsr)
589                 return IRQ_NONE;
590
591         dev_err_ratelimited(smmu->dev,
592                 "Unexpected global fault, this could be serious\n");
593         dev_err_ratelimited(smmu->dev,
594                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
595                 gfsr, gfsynr0, gfsynr1, gfsynr2);
596
597         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
598         return IRQ_HANDLED;
599 }
600
601 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
602                                        struct io_pgtable_cfg *pgtbl_cfg)
603 {
604         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
605         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
606         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
607
608         cb->cfg = cfg;
609
610         /* TTBCR */
611         if (stage1) {
612                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
613                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
614                 } else {
615                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
616                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
617                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
618                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
619                                 cb->tcr[1] |= TTBCR2_AS;
620                 }
621         } else {
622                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
623         }
624
625         /* TTBRs */
626         if (stage1) {
627                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
628                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
629                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
630                 } else {
631                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
632                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
633                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
634                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
635                 }
636         } else {
637                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
638         }
639
640         /* MAIRs (stage-1 only) */
641         if (stage1) {
642                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
643                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
644                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
645                 } else {
646                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
647                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
648                 }
649         }
650 }
651
652 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
653 {
654         u32 reg;
655         bool stage1;
656         struct arm_smmu_cb *cb = &smmu->cbs[idx];
657         struct arm_smmu_cfg *cfg = cb->cfg;
658         void __iomem *cb_base, *gr1_base;
659
660         cb_base = ARM_SMMU_CB(smmu, idx);
661
662         /* Unassigned context banks only need disabling */
663         if (!cfg) {
664                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
665                 return;
666         }
667
668         gr1_base = ARM_SMMU_GR1(smmu);
669         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
670
671         /* CBA2R */
672         if (smmu->version > ARM_SMMU_V1) {
673                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
674                         reg = CBA2R_RW64_64BIT;
675                 else
676                         reg = CBA2R_RW64_32BIT;
677                 /* 16-bit VMIDs live in CBA2R */
678                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
679                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
680
681                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
682         }
683
684         /* CBAR */
685         reg = cfg->cbar;
686         if (smmu->version < ARM_SMMU_V2)
687                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
688
689         /*
690          * Use the weakest shareability/memory types, so they are
691          * overridden by the ttbcr/pte.
692          */
693         if (stage1) {
694                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
695                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
696         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
697                 /* 8-bit VMIDs live in CBAR */
698                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
699         }
700         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
701
702         /*
703          * TTBCR
704          * We must write this before the TTBRs, since it determines the
705          * access behaviour of some fields (in particular, ASID[15:8]).
706          */
707         if (stage1 && smmu->version > ARM_SMMU_V1)
708                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
709         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
710
711         /* TTBRs */
712         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
713                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
714                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
715                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
716         } else {
717                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
718                 if (stage1)
719                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
720         }
721
722         /* MAIRs (stage-1 only) */
723         if (stage1) {
724                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
725                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
726         }
727
728         /* SCTLR */
729         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
730         if (stage1)
731                 reg |= SCTLR_S1_ASIDPNE;
732         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
733                 reg |= SCTLR_E;
734
735         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
736 }
737
738 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
739                                         struct arm_smmu_device *smmu)
740 {
741         int irq, start, ret = 0;
742         unsigned long ias, oas;
743         struct io_pgtable_ops *pgtbl_ops;
744         struct io_pgtable_cfg pgtbl_cfg;
745         enum io_pgtable_fmt fmt;
746         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
747         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
748
749         mutex_lock(&smmu_domain->init_mutex);
750         if (smmu_domain->smmu)
751                 goto out_unlock;
752
753         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
754                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
755                 smmu_domain->smmu = smmu;
756                 goto out_unlock;
757         }
758
759         /*
760          * Mapping the requested stage onto what we support is surprisingly
761          * complicated, mainly because the spec allows S1+S2 SMMUs without
762          * support for nested translation. That means we end up with the
763          * following table:
764          *
765          * Requested        Supported        Actual
766          *     S1               N              S1
767          *     S1             S1+S2            S1
768          *     S1               S2             S2
769          *     S1               S1             S1
770          *     N                N              N
771          *     N              S1+S2            S2
772          *     N                S2             S2
773          *     N                S1             S1
774          *
775          * Note that you can't actually request stage-2 mappings.
776          */
777         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
778                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
779         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
780                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
781
782         /*
783          * Choosing a suitable context format is even more fiddly. Until we
784          * grow some way for the caller to express a preference, and/or move
785          * the decision into the io-pgtable code where it arguably belongs,
786          * just aim for the closest thing to the rest of the system, and hope
787          * that the hardware isn't esoteric enough that we can't assume AArch64
788          * support to be a superset of AArch32 support...
789          */
790         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
791                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
792         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
793             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
794             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
795             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
796                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
797         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
798             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
799                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
800                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
801                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
802
803         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
804                 ret = -EINVAL;
805                 goto out_unlock;
806         }
807
808         switch (smmu_domain->stage) {
809         case ARM_SMMU_DOMAIN_S1:
810                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
811                 start = smmu->num_s2_context_banks;
812                 ias = smmu->va_size;
813                 oas = smmu->ipa_size;
814                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
815                         fmt = ARM_64_LPAE_S1;
816                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
817                         fmt = ARM_32_LPAE_S1;
818                         ias = min(ias, 32UL);
819                         oas = min(oas, 40UL);
820                 } else {
821                         fmt = ARM_V7S;
822                         ias = min(ias, 32UL);
823                         oas = min(oas, 32UL);
824                 }
825                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
826                 break;
827         case ARM_SMMU_DOMAIN_NESTED:
828                 /*
829                  * We will likely want to change this if/when KVM gets
830                  * involved.
831                  */
832         case ARM_SMMU_DOMAIN_S2:
833                 cfg->cbar = CBAR_TYPE_S2_TRANS;
834                 start = 0;
835                 ias = smmu->ipa_size;
836                 oas = smmu->pa_size;
837                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
838                         fmt = ARM_64_LPAE_S2;
839                 } else {
840                         fmt = ARM_32_LPAE_S2;
841                         ias = min(ias, 40UL);
842                         oas = min(oas, 40UL);
843                 }
844                 if (smmu->version == ARM_SMMU_V2)
845                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
846                 else
847                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
848                 break;
849         default:
850                 ret = -EINVAL;
851                 goto out_unlock;
852         }
853         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
854                                       smmu->num_context_banks);
855         if (ret < 0)
856                 goto out_unlock;
857
858         cfg->cbndx = ret;
859         if (smmu->version < ARM_SMMU_V2) {
860                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
861                 cfg->irptndx %= smmu->num_context_irqs;
862         } else {
863                 cfg->irptndx = cfg->cbndx;
864         }
865
866         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
867                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
868         else
869                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
870
871         pgtbl_cfg = (struct io_pgtable_cfg) {
872                 .pgsize_bitmap  = smmu->pgsize_bitmap,
873                 .ias            = ias,
874                 .oas            = oas,
875                 .tlb            = smmu_domain->tlb_ops,
876                 .iommu_dev      = smmu->dev,
877         };
878
879         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
880                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
881
882         smmu_domain->smmu = smmu;
883         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
884         if (!pgtbl_ops) {
885                 ret = -ENOMEM;
886                 goto out_clear_smmu;
887         }
888
889         /* Update the domain's page sizes to reflect the page table format */
890         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
891         domain->geometry.aperture_end = (1UL << ias) - 1;
892         domain->geometry.force_aperture = true;
893
894         /* Initialise the context bank with our page table cfg */
895         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
896         arm_smmu_write_context_bank(smmu, cfg->cbndx);
897
898         /*
899          * Request context fault interrupt. Do this last to avoid the
900          * handler seeing a half-initialised domain state.
901          */
902         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
903         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
904                                IRQF_SHARED, "arm-smmu-context-fault", domain);
905         if (ret < 0) {
906                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
907                         cfg->irptndx, irq);
908                 cfg->irptndx = INVALID_IRPTNDX;
909         }
910
911         mutex_unlock(&smmu_domain->init_mutex);
912
913         /* Publish page table ops for map/unmap */
914         smmu_domain->pgtbl_ops = pgtbl_ops;
915         return 0;
916
917 out_clear_smmu:
918         smmu_domain->smmu = NULL;
919 out_unlock:
920         mutex_unlock(&smmu_domain->init_mutex);
921         return ret;
922 }
923
924 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
925 {
926         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
927         struct arm_smmu_device *smmu = smmu_domain->smmu;
928         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
929         int irq;
930
931         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
932                 return;
933
934         /*
935          * Disable the context bank and free the page tables before freeing
936          * it.
937          */
938         smmu->cbs[cfg->cbndx].cfg = NULL;
939         arm_smmu_write_context_bank(smmu, cfg->cbndx);
940
941         if (cfg->irptndx != INVALID_IRPTNDX) {
942                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
943                 devm_free_irq(smmu->dev, irq, domain);
944         }
945
946         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
947         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
948 }
949
950 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
951 {
952         struct arm_smmu_domain *smmu_domain;
953
954         if (type != IOMMU_DOMAIN_UNMANAGED &&
955             type != IOMMU_DOMAIN_DMA &&
956             type != IOMMU_DOMAIN_IDENTITY)
957                 return NULL;
958         /*
959          * Allocate the domain and initialise some of its data structures.
960          * We can't really do anything meaningful until we've added a
961          * master.
962          */
963         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
964         if (!smmu_domain)
965                 return NULL;
966
967         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
968             iommu_get_dma_cookie(&smmu_domain->domain))) {
969                 kfree(smmu_domain);
970                 return NULL;
971         }
972
973         mutex_init(&smmu_domain->init_mutex);
974         spin_lock_init(&smmu_domain->cb_lock);
975
976         return &smmu_domain->domain;
977 }
978
979 static void arm_smmu_domain_free(struct iommu_domain *domain)
980 {
981         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
982
983         /*
984          * Free the domain resources. We assume that all devices have
985          * already been detached.
986          */
987         iommu_put_dma_cookie(domain);
988         arm_smmu_destroy_domain_context(domain);
989         kfree(smmu_domain);
990 }
991
992 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
993 {
994         struct arm_smmu_smr *smr = smmu->smrs + idx;
995         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
996
997         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
998                 reg |= SMR_VALID;
999         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1000 }
1001
1002 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1003 {
1004         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1005         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1006                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1007                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1008
1009         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1010             smmu->smrs[idx].valid)
1011                 reg |= S2CR_EXIDVALID;
1012         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1013 }
1014
1015 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1016 {
1017         arm_smmu_write_s2cr(smmu, idx);
1018         if (smmu->smrs)
1019                 arm_smmu_write_smr(smmu, idx);
1020 }
1021
1022 /*
1023  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1024  * should be called after sCR0 is written.
1025  */
1026 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1027 {
1028         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1029         u32 smr;
1030
1031         if (!smmu->smrs)
1032                 return;
1033
1034         /*
1035          * SMR.ID bits may not be preserved if the corresponding MASK
1036          * bits are set, so check each one separately. We can reject
1037          * masters later if they try to claim IDs outside these masks.
1038          */
1039         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1040         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1041         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1042         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1043
1044         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1045         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1046         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1047         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1048 }
1049
1050 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1051 {
1052         struct arm_smmu_smr *smrs = smmu->smrs;
1053         int i, free_idx = -ENOSPC;
1054
1055         /* Stream indexing is blissfully easy */
1056         if (!smrs)
1057                 return id;
1058
1059         /* Validating SMRs is... less so */
1060         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1061                 if (!smrs[i].valid) {
1062                         /*
1063                          * Note the first free entry we come across, which
1064                          * we'll claim in the end if nothing else matches.
1065                          */
1066                         if (free_idx < 0)
1067                                 free_idx = i;
1068                         continue;
1069                 }
1070                 /*
1071                  * If the new entry is _entirely_ matched by an existing entry,
1072                  * then reuse that, with the guarantee that there also cannot
1073                  * be any subsequent conflicting entries. In normal use we'd
1074                  * expect simply identical entries for this case, but there's
1075                  * no harm in accommodating the generalisation.
1076                  */
1077                 if ((mask & smrs[i].mask) == mask &&
1078                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1079                         return i;
1080                 /*
1081                  * If the new entry has any other overlap with an existing one,
1082                  * though, then there always exists at least one stream ID
1083                  * which would cause a conflict, and we can't allow that risk.
1084                  */
1085                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1086                         return -EINVAL;
1087         }
1088
1089         return free_idx;
1090 }
1091
1092 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1093 {
1094         if (--smmu->s2crs[idx].count)
1095                 return false;
1096
1097         smmu->s2crs[idx] = s2cr_init_val;
1098         if (smmu->smrs)
1099                 smmu->smrs[idx].valid = false;
1100
1101         return true;
1102 }
1103
1104 static int arm_smmu_master_alloc_smes(struct device *dev)
1105 {
1106         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1107         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1108         struct arm_smmu_device *smmu = cfg->smmu;
1109         struct arm_smmu_smr *smrs = smmu->smrs;
1110         struct iommu_group *group;
1111         int i, idx, ret;
1112
1113         mutex_lock(&smmu->stream_map_mutex);
1114         /* Figure out a viable stream map entry allocation */
1115         for_each_cfg_sme(fwspec, i, idx) {
1116                 u16 sid = fwspec->ids[i];
1117                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1118
1119                 if (idx != INVALID_SMENDX) {
1120                         ret = -EEXIST;
1121                         goto out_err;
1122                 }
1123
1124                 ret = arm_smmu_find_sme(smmu, sid, mask);
1125                 if (ret < 0)
1126                         goto out_err;
1127
1128                 idx = ret;
1129                 if (smrs && smmu->s2crs[idx].count == 0) {
1130                         smrs[idx].id = sid;
1131                         smrs[idx].mask = mask;
1132                         smrs[idx].valid = true;
1133                 }
1134                 smmu->s2crs[idx].count++;
1135                 cfg->smendx[i] = (s16)idx;
1136         }
1137
1138         group = iommu_group_get_for_dev(dev);
1139         if (!group)
1140                 group = ERR_PTR(-ENOMEM);
1141         if (IS_ERR(group)) {
1142                 ret = PTR_ERR(group);
1143                 goto out_err;
1144         }
1145         iommu_group_put(group);
1146
1147         /* It worked! Now, poke the actual hardware */
1148         for_each_cfg_sme(fwspec, i, idx) {
1149                 arm_smmu_write_sme(smmu, idx);
1150                 smmu->s2crs[idx].group = group;
1151         }
1152
1153         mutex_unlock(&smmu->stream_map_mutex);
1154         return 0;
1155
1156 out_err:
1157         while (i--) {
1158                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1159                 cfg->smendx[i] = INVALID_SMENDX;
1160         }
1161         mutex_unlock(&smmu->stream_map_mutex);
1162         return ret;
1163 }
1164
1165 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1166 {
1167         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1168         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1169         int i, idx;
1170
1171         mutex_lock(&smmu->stream_map_mutex);
1172         for_each_cfg_sme(fwspec, i, idx) {
1173                 if (arm_smmu_free_sme(smmu, idx))
1174                         arm_smmu_write_sme(smmu, idx);
1175                 cfg->smendx[i] = INVALID_SMENDX;
1176         }
1177         mutex_unlock(&smmu->stream_map_mutex);
1178 }
1179
1180 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1181                                       struct iommu_fwspec *fwspec)
1182 {
1183         struct arm_smmu_device *smmu = smmu_domain->smmu;
1184         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1185         u8 cbndx = smmu_domain->cfg.cbndx;
1186         enum arm_smmu_s2cr_type type;
1187         int i, idx;
1188
1189         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1190                 type = S2CR_TYPE_BYPASS;
1191         else
1192                 type = S2CR_TYPE_TRANS;
1193
1194         for_each_cfg_sme(fwspec, i, idx) {
1195                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1196                         continue;
1197
1198                 s2cr[idx].type = type;
1199                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1200                 s2cr[idx].cbndx = cbndx;
1201                 arm_smmu_write_s2cr(smmu, idx);
1202         }
1203         return 0;
1204 }
1205
1206 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1207 {
1208         int ret;
1209         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1210         struct arm_smmu_device *smmu;
1211         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1212
1213         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1214                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1215                 return -ENXIO;
1216         }
1217
1218         /*
1219          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1220          * domains between of_xlate() and add_device() - we have no way to cope
1221          * with that, so until ARM gets converted to rely on groups and default
1222          * domains, just say no (but more politely than by dereferencing NULL).
1223          * This should be at least a WARN_ON once that's sorted.
1224          */
1225         if (!fwspec->iommu_priv)
1226                 return -ENODEV;
1227
1228         smmu = fwspec_smmu(fwspec);
1229         /* Ensure that the domain is finalised */
1230         ret = arm_smmu_init_domain_context(domain, smmu);
1231         if (ret < 0)
1232                 return ret;
1233
1234         /*
1235          * Sanity check the domain. We don't support domains across
1236          * different SMMUs.
1237          */
1238         if (smmu_domain->smmu != smmu) {
1239                 dev_err(dev,
1240                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1241                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1242                 return -EINVAL;
1243         }
1244
1245         /* Looks ok, so add the device to the domain */
1246         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1247 }
1248
1249 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1250                         phys_addr_t paddr, size_t size, int prot)
1251 {
1252         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1253
1254         if (!ops)
1255                 return -ENODEV;
1256
1257         return ops->map(ops, iova, paddr, size, prot);
1258 }
1259
1260 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1261                              size_t size)
1262 {
1263         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1264
1265         if (!ops)
1266                 return 0;
1267
1268         return ops->unmap(ops, iova, size);
1269 }
1270
1271 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1272 {
1273         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1274
1275         if (smmu_domain->tlb_ops)
1276                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1277 }
1278
1279 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1280                                               dma_addr_t iova)
1281 {
1282         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1283         struct arm_smmu_device *smmu = smmu_domain->smmu;
1284         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1285         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1286         struct device *dev = smmu->dev;
1287         void __iomem *cb_base;
1288         u32 tmp;
1289         u64 phys;
1290         unsigned long va, flags;
1291
1292         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1293
1294         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1295         /* ATS1 registers can only be written atomically */
1296         va = iova & ~0xfffUL;
1297         if (smmu->version == ARM_SMMU_V2)
1298                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1299         else /* Register is only 32-bit in v1 */
1300                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1301
1302         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1303                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1304                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1305                 dev_err(dev,
1306                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1307                         &iova);
1308                 return ops->iova_to_phys(ops, iova);
1309         }
1310
1311         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1312         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1313         if (phys & CB_PAR_F) {
1314                 dev_err(dev, "translation fault!\n");
1315                 dev_err(dev, "PAR = 0x%llx\n", phys);
1316                 return 0;
1317         }
1318
1319         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1320 }
1321
1322 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1323                                         dma_addr_t iova)
1324 {
1325         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1326         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1327
1328         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1329                 return iova;
1330
1331         if (!ops)
1332                 return 0;
1333
1334         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1335                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1336                 return arm_smmu_iova_to_phys_hard(domain, iova);
1337
1338         return ops->iova_to_phys(ops, iova);
1339 }
1340
1341 static bool arm_smmu_capable(enum iommu_cap cap)
1342 {
1343         switch (cap) {
1344         case IOMMU_CAP_CACHE_COHERENCY:
1345                 /*
1346                  * Return true here as the SMMU can always send out coherent
1347                  * requests.
1348                  */
1349                 return true;
1350         case IOMMU_CAP_NOEXEC:
1351                 return true;
1352         default:
1353                 return false;
1354         }
1355 }
1356
1357 static int arm_smmu_match_node(struct device *dev, void *data)
1358 {
1359         return dev->fwnode == data;
1360 }
1361
1362 static
1363 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1364 {
1365         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1366                                                 fwnode, arm_smmu_match_node);
1367         put_device(dev);
1368         return dev ? dev_get_drvdata(dev) : NULL;
1369 }
1370
1371 static int arm_smmu_add_device(struct device *dev)
1372 {
1373         struct arm_smmu_device *smmu;
1374         struct arm_smmu_master_cfg *cfg;
1375         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1376         int i, ret;
1377
1378         if (using_legacy_binding) {
1379                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1380
1381                 /*
1382                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1383                  * will allocate/initialise a new one. Thus we need to update fwspec for
1384                  * later use.
1385                  */
1386                 fwspec = dev->iommu_fwspec;
1387                 if (ret)
1388                         goto out_free;
1389         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1390                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1391         } else {
1392                 return -ENODEV;
1393         }
1394
1395         ret = -EINVAL;
1396         for (i = 0; i < fwspec->num_ids; i++) {
1397                 u16 sid = fwspec->ids[i];
1398                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1399
1400                 if (sid & ~smmu->streamid_mask) {
1401                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1402                                 sid, smmu->streamid_mask);
1403                         goto out_free;
1404                 }
1405                 if (mask & ~smmu->smr_mask_mask) {
1406                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1407                                 mask, smmu->smr_mask_mask);
1408                         goto out_free;
1409                 }
1410         }
1411
1412         ret = -ENOMEM;
1413         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1414                       GFP_KERNEL);
1415         if (!cfg)
1416                 goto out_free;
1417
1418         cfg->smmu = smmu;
1419         fwspec->iommu_priv = cfg;
1420         while (i--)
1421                 cfg->smendx[i] = INVALID_SMENDX;
1422
1423         ret = arm_smmu_master_alloc_smes(dev);
1424         if (ret)
1425                 goto out_cfg_free;
1426
1427         iommu_device_link(&smmu->iommu, dev);
1428
1429         return 0;
1430
1431 out_cfg_free:
1432         kfree(cfg);
1433 out_free:
1434         iommu_fwspec_free(dev);
1435         return ret;
1436 }
1437
1438 static void arm_smmu_remove_device(struct device *dev)
1439 {
1440         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1441         struct arm_smmu_master_cfg *cfg;
1442         struct arm_smmu_device *smmu;
1443
1444
1445         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1446                 return;
1447
1448         cfg  = fwspec->iommu_priv;
1449         smmu = cfg->smmu;
1450
1451         iommu_device_unlink(&smmu->iommu, dev);
1452         arm_smmu_master_free_smes(fwspec);
1453         iommu_group_remove_device(dev);
1454         kfree(fwspec->iommu_priv);
1455         iommu_fwspec_free(dev);
1456 }
1457
1458 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1459 {
1460         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1461         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1462         struct iommu_group *group = NULL;
1463         int i, idx;
1464
1465         for_each_cfg_sme(fwspec, i, idx) {
1466                 if (group && smmu->s2crs[idx].group &&
1467                     group != smmu->s2crs[idx].group)
1468                         return ERR_PTR(-EINVAL);
1469
1470                 group = smmu->s2crs[idx].group;
1471         }
1472
1473         if (group)
1474                 return iommu_group_ref_get(group);
1475
1476         if (dev_is_pci(dev))
1477                 group = pci_device_group(dev);
1478         else
1479                 group = generic_device_group(dev);
1480
1481         return group;
1482 }
1483
1484 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1485                                     enum iommu_attr attr, void *data)
1486 {
1487         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1488
1489         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1490                 return -EINVAL;
1491
1492         switch (attr) {
1493         case DOMAIN_ATTR_NESTING:
1494                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1495                 return 0;
1496         default:
1497                 return -ENODEV;
1498         }
1499 }
1500
1501 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1502                                     enum iommu_attr attr, void *data)
1503 {
1504         int ret = 0;
1505         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1506
1507         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1508                 return -EINVAL;
1509
1510         mutex_lock(&smmu_domain->init_mutex);
1511
1512         switch (attr) {
1513         case DOMAIN_ATTR_NESTING:
1514                 if (smmu_domain->smmu) {
1515                         ret = -EPERM;
1516                         goto out_unlock;
1517                 }
1518
1519                 if (*(int *)data)
1520                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1521                 else
1522                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1523
1524                 break;
1525         default:
1526                 ret = -ENODEV;
1527         }
1528
1529 out_unlock:
1530         mutex_unlock(&smmu_domain->init_mutex);
1531         return ret;
1532 }
1533
1534 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1535 {
1536         u32 mask, fwid = 0;
1537
1538         if (args->args_count > 0)
1539                 fwid |= (u16)args->args[0];
1540
1541         if (args->args_count > 1)
1542                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1543         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1544                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1545
1546         return iommu_fwspec_add_ids(dev, &fwid, 1);
1547 }
1548
1549 static void arm_smmu_get_resv_regions(struct device *dev,
1550                                       struct list_head *head)
1551 {
1552         struct iommu_resv_region *region;
1553         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1554
1555         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1556                                          prot, IOMMU_RESV_SW_MSI);
1557         if (!region)
1558                 return;
1559
1560         list_add_tail(&region->list, head);
1561
1562         iommu_dma_get_resv_regions(dev, head);
1563 }
1564
1565 static void arm_smmu_put_resv_regions(struct device *dev,
1566                                       struct list_head *head)
1567 {
1568         struct iommu_resv_region *entry, *next;
1569
1570         list_for_each_entry_safe(entry, next, head, list)
1571                 kfree(entry);
1572 }
1573
1574 static struct iommu_ops arm_smmu_ops = {
1575         .capable                = arm_smmu_capable,
1576         .domain_alloc           = arm_smmu_domain_alloc,
1577         .domain_free            = arm_smmu_domain_free,
1578         .attach_dev             = arm_smmu_attach_dev,
1579         .map                    = arm_smmu_map,
1580         .unmap                  = arm_smmu_unmap,
1581         .flush_iotlb_all        = arm_smmu_iotlb_sync,
1582         .iotlb_sync             = arm_smmu_iotlb_sync,
1583         .iova_to_phys           = arm_smmu_iova_to_phys,
1584         .add_device             = arm_smmu_add_device,
1585         .remove_device          = arm_smmu_remove_device,
1586         .device_group           = arm_smmu_device_group,
1587         .domain_get_attr        = arm_smmu_domain_get_attr,
1588         .domain_set_attr        = arm_smmu_domain_set_attr,
1589         .of_xlate               = arm_smmu_of_xlate,
1590         .get_resv_regions       = arm_smmu_get_resv_regions,
1591         .put_resv_regions       = arm_smmu_put_resv_regions,
1592         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1593 };
1594
1595 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1596 {
1597         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1598         int i;
1599         u32 reg, major;
1600
1601         /* clear global FSR */
1602         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1603         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1604
1605         /*
1606          * Reset stream mapping groups: Initial values mark all SMRn as
1607          * invalid and all S2CRn as bypass unless overridden.
1608          */
1609         for (i = 0; i < smmu->num_mapping_groups; ++i)
1610                 arm_smmu_write_sme(smmu, i);
1611
1612         if (smmu->model == ARM_MMU500) {
1613                 /*
1614                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1615                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1616                  * bit is only present in MMU-500r2 onwards.
1617                  */
1618                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1619                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1620                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1621                 if (major >= 2)
1622                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1623                 /*
1624                  * Allow unmatched Stream IDs to allocate bypass
1625                  * TLB entries for reduced latency.
1626                  */
1627                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1628                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1629         }
1630
1631         /* Make sure all context banks are disabled and clear CB_FSR  */
1632         for (i = 0; i < smmu->num_context_banks; ++i) {
1633                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1634
1635                 arm_smmu_write_context_bank(smmu, i);
1636                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1637                 /*
1638                  * Disable MMU-500's not-particularly-beneficial next-page
1639                  * prefetcher for the sake of errata #841119 and #826419.
1640                  */
1641                 if (smmu->model == ARM_MMU500) {
1642                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1643                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1644                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1645                 }
1646         }
1647
1648         /* Invalidate the TLB, just in case */
1649         writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1650         writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1651
1652         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1653
1654         /* Enable fault reporting */
1655         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1656
1657         /* Disable TLB broadcasting. */
1658         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1659
1660         /* Enable client access, handling unmatched streams as appropriate */
1661         reg &= ~sCR0_CLIENTPD;
1662         if (disable_bypass)
1663                 reg |= sCR0_USFCFG;
1664         else
1665                 reg &= ~sCR0_USFCFG;
1666
1667         /* Disable forced broadcasting */
1668         reg &= ~sCR0_FB;
1669
1670         /* Don't upgrade barriers */
1671         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1672
1673         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1674                 reg |= sCR0_VMID16EN;
1675
1676         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1677                 reg |= sCR0_EXIDENABLE;
1678
1679         /* Push the button */
1680         arm_smmu_tlb_sync_global(smmu);
1681         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1682 }
1683
1684 static int arm_smmu_id_size_to_bits(int size)
1685 {
1686         switch (size) {
1687         case 0:
1688                 return 32;
1689         case 1:
1690                 return 36;
1691         case 2:
1692                 return 40;
1693         case 3:
1694                 return 42;
1695         case 4:
1696                 return 44;
1697         case 5:
1698         default:
1699                 return 48;
1700         }
1701 }
1702
1703 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1704 {
1705         unsigned long size;
1706         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1707         u32 id;
1708         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1709         int i;
1710
1711         dev_notice(smmu->dev, "probing hardware configuration...\n");
1712         dev_notice(smmu->dev, "SMMUv%d with:\n",
1713                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1714
1715         /* ID0 */
1716         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1717
1718         /* Restrict available stages based on module parameter */
1719         if (force_stage == 1)
1720                 id &= ~(ID0_S2TS | ID0_NTS);
1721         else if (force_stage == 2)
1722                 id &= ~(ID0_S1TS | ID0_NTS);
1723
1724         if (id & ID0_S1TS) {
1725                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1726                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1727         }
1728
1729         if (id & ID0_S2TS) {
1730                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1731                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1732         }
1733
1734         if (id & ID0_NTS) {
1735                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1736                 dev_notice(smmu->dev, "\tnested translation\n");
1737         }
1738
1739         if (!(smmu->features &
1740                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1741                 dev_err(smmu->dev, "\tno translation support!\n");
1742                 return -ENODEV;
1743         }
1744
1745         if ((id & ID0_S1TS) &&
1746                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1747                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1748                 dev_notice(smmu->dev, "\taddress translation ops\n");
1749         }
1750
1751         /*
1752          * In order for DMA API calls to work properly, we must defer to what
1753          * the FW says about coherency, regardless of what the hardware claims.
1754          * Fortunately, this also opens up a workaround for systems where the
1755          * ID register value has ended up configured incorrectly.
1756          */
1757         cttw_reg = !!(id & ID0_CTTW);
1758         if (cttw_fw || cttw_reg)
1759                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1760                            cttw_fw ? "" : "non-");
1761         if (cttw_fw != cttw_reg)
1762                 dev_notice(smmu->dev,
1763                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1764
1765         /* Max. number of entries we have for stream matching/indexing */
1766         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1767                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1768                 size = 1 << 16;
1769         } else {
1770                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1771         }
1772         smmu->streamid_mask = size - 1;
1773         if (id & ID0_SMS) {
1774                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1775                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1776                 if (size == 0) {
1777                         dev_err(smmu->dev,
1778                                 "stream-matching supported, but no SMRs present!\n");
1779                         return -ENODEV;
1780                 }
1781
1782                 /* Zero-initialised to mark as invalid */
1783                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1784                                           GFP_KERNEL);
1785                 if (!smmu->smrs)
1786                         return -ENOMEM;
1787
1788                 dev_notice(smmu->dev,
1789                            "\tstream matching with %lu register groups", size);
1790         }
1791         /* s2cr->type == 0 means translation, so initialise explicitly */
1792         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1793                                          GFP_KERNEL);
1794         if (!smmu->s2crs)
1795                 return -ENOMEM;
1796         for (i = 0; i < size; i++)
1797                 smmu->s2crs[i] = s2cr_init_val;
1798
1799         smmu->num_mapping_groups = size;
1800         mutex_init(&smmu->stream_map_mutex);
1801         spin_lock_init(&smmu->global_sync_lock);
1802
1803         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1804                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1805                 if (!(id & ID0_PTFS_NO_AARCH32S))
1806                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1807         }
1808
1809         /* ID1 */
1810         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1811         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1812
1813         /* Check for size mismatch of SMMU address space from mapped region */
1814         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1815         size <<= smmu->pgshift;
1816         if (smmu->cb_base != gr0_base + size)
1817                 dev_warn(smmu->dev,
1818                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1819                         size * 2, (smmu->cb_base - gr0_base) * 2);
1820
1821         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1822         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1823         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1824                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1825                 return -ENODEV;
1826         }
1827         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1828                    smmu->num_context_banks, smmu->num_s2_context_banks);
1829         /*
1830          * Cavium CN88xx erratum #27704.
1831          * Ensure ASID and VMID allocation is unique across all SMMUs in
1832          * the system.
1833          */
1834         if (smmu->model == CAVIUM_SMMUV2) {
1835                 smmu->cavium_id_base =
1836                         atomic_add_return(smmu->num_context_banks,
1837                                           &cavium_smmu_context_count);
1838                 smmu->cavium_id_base -= smmu->num_context_banks;
1839                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1840         }
1841         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1842                                  sizeof(*smmu->cbs), GFP_KERNEL);
1843         if (!smmu->cbs)
1844                 return -ENOMEM;
1845
1846         /* ID2 */
1847         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1848         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1849         smmu->ipa_size = size;
1850
1851         /* The output mask is also applied for bypass */
1852         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1853         smmu->pa_size = size;
1854
1855         if (id & ID2_VMID16)
1856                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1857
1858         /*
1859          * What the page table walker can address actually depends on which
1860          * descriptor format is in use, but since a) we don't know that yet,
1861          * and b) it can vary per context bank, this will have to do...
1862          */
1863         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1864                 dev_warn(smmu->dev,
1865                          "failed to set DMA mask for table walker\n");
1866
1867         if (smmu->version < ARM_SMMU_V2) {
1868                 smmu->va_size = smmu->ipa_size;
1869                 if (smmu->version == ARM_SMMU_V1_64K)
1870                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1871         } else {
1872                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1873                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1874                 if (id & ID2_PTFS_4K)
1875                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1876                 if (id & ID2_PTFS_16K)
1877                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1878                 if (id & ID2_PTFS_64K)
1879                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1880         }
1881
1882         /* Now we've corralled the various formats, what'll it do? */
1883         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1884                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1885         if (smmu->features &
1886             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1887                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1888         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1889                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1890         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1891                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1892
1893         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1894                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1895         else
1896                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1897         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1898                    smmu->pgsize_bitmap);
1899
1900
1901         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1902                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1903                            smmu->va_size, smmu->ipa_size);
1904
1905         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1906                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1907                            smmu->ipa_size, smmu->pa_size);
1908
1909         return 0;
1910 }
1911
1912 struct arm_smmu_match_data {
1913         enum arm_smmu_arch_version version;
1914         enum arm_smmu_implementation model;
1915 };
1916
1917 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1918 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1919
1920 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1921 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1922 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1923 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1924 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1925 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1926
1927 static const struct of_device_id arm_smmu_of_match[] = {
1928         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1929         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1930         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1931         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1932         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1933         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1934         { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1935         { },
1936 };
1937 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1938
1939 #ifdef CONFIG_ACPI
1940 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1941 {
1942         int ret = 0;
1943
1944         switch (model) {
1945         case ACPI_IORT_SMMU_V1:
1946         case ACPI_IORT_SMMU_CORELINK_MMU400:
1947                 smmu->version = ARM_SMMU_V1;
1948                 smmu->model = GENERIC_SMMU;
1949                 break;
1950         case ACPI_IORT_SMMU_CORELINK_MMU401:
1951                 smmu->version = ARM_SMMU_V1_64K;
1952                 smmu->model = GENERIC_SMMU;
1953                 break;
1954         case ACPI_IORT_SMMU_V2:
1955                 smmu->version = ARM_SMMU_V2;
1956                 smmu->model = GENERIC_SMMU;
1957                 break;
1958         case ACPI_IORT_SMMU_CORELINK_MMU500:
1959                 smmu->version = ARM_SMMU_V2;
1960                 smmu->model = ARM_MMU500;
1961                 break;
1962         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1963                 smmu->version = ARM_SMMU_V2;
1964                 smmu->model = CAVIUM_SMMUV2;
1965                 break;
1966         default:
1967                 ret = -ENODEV;
1968         }
1969
1970         return ret;
1971 }
1972
1973 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1974                                       struct arm_smmu_device *smmu)
1975 {
1976         struct device *dev = smmu->dev;
1977         struct acpi_iort_node *node =
1978                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1979         struct acpi_iort_smmu *iort_smmu;
1980         int ret;
1981
1982         /* Retrieve SMMU1/2 specific data */
1983         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1984
1985         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1986         if (ret < 0)
1987                 return ret;
1988
1989         /* Ignore the configuration access interrupt */
1990         smmu->num_global_irqs = 1;
1991
1992         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1993                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1994
1995         return 0;
1996 }
1997 #else
1998 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1999                                              struct arm_smmu_device *smmu)
2000 {
2001         return -ENODEV;
2002 }
2003 #endif
2004
2005 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2006                                     struct arm_smmu_device *smmu)
2007 {
2008         const struct arm_smmu_match_data *data;
2009         struct device *dev = &pdev->dev;
2010         bool legacy_binding;
2011
2012         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2013                                  &smmu->num_global_irqs)) {
2014                 dev_err(dev, "missing #global-interrupts property\n");
2015                 return -ENODEV;
2016         }
2017
2018         data = of_device_get_match_data(dev);
2019         smmu->version = data->version;
2020         smmu->model = data->model;
2021
2022         parse_driver_options(smmu);
2023
2024         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2025         if (legacy_binding && !using_generic_binding) {
2026                 if (!using_legacy_binding)
2027                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2028                 using_legacy_binding = true;
2029         } else if (!legacy_binding && !using_legacy_binding) {
2030                 using_generic_binding = true;
2031         } else {
2032                 dev_err(dev, "not probing due to mismatched DT properties\n");
2033                 return -ENODEV;
2034         }
2035
2036         if (of_dma_is_coherent(dev->of_node))
2037                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2038
2039         return 0;
2040 }
2041
2042 static void arm_smmu_bus_init(void)
2043 {
2044         /* Oh, for a proper bus abstraction */
2045         if (!iommu_present(&platform_bus_type))
2046                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2047 #ifdef CONFIG_ARM_AMBA
2048         if (!iommu_present(&amba_bustype))
2049                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2050 #endif
2051 #ifdef CONFIG_PCI
2052         if (!iommu_present(&pci_bus_type)) {
2053                 pci_request_acs();
2054                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2055         }
2056 #endif
2057 }
2058
2059 static int arm_smmu_device_probe(struct platform_device *pdev)
2060 {
2061         struct resource *res;
2062         resource_size_t ioaddr;
2063         struct arm_smmu_device *smmu;
2064         struct device *dev = &pdev->dev;
2065         int num_irqs, i, err;
2066
2067         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2068         if (!smmu) {
2069                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2070                 return -ENOMEM;
2071         }
2072         smmu->dev = dev;
2073
2074         if (dev->of_node)
2075                 err = arm_smmu_device_dt_probe(pdev, smmu);
2076         else
2077                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2078
2079         if (err)
2080                 return err;
2081
2082         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2083         ioaddr = res->start;
2084         smmu->base = devm_ioremap_resource(dev, res);
2085         if (IS_ERR(smmu->base))
2086                 return PTR_ERR(smmu->base);
2087         smmu->cb_base = smmu->base + resource_size(res) / 2;
2088
2089         num_irqs = 0;
2090         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2091                 num_irqs++;
2092                 if (num_irqs > smmu->num_global_irqs)
2093                         smmu->num_context_irqs++;
2094         }
2095
2096         if (!smmu->num_context_irqs) {
2097                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2098                         num_irqs, smmu->num_global_irqs + 1);
2099                 return -ENODEV;
2100         }
2101
2102         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2103                                   GFP_KERNEL);
2104         if (!smmu->irqs) {
2105                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2106                 return -ENOMEM;
2107         }
2108
2109         for (i = 0; i < num_irqs; ++i) {
2110                 int irq = platform_get_irq(pdev, i);
2111
2112                 if (irq < 0) {
2113                         dev_err(dev, "failed to get irq index %d\n", i);
2114                         return -ENODEV;
2115                 }
2116                 smmu->irqs[i] = irq;
2117         }
2118
2119         err = arm_smmu_device_cfg_probe(smmu);
2120         if (err)
2121                 return err;
2122
2123         if (smmu->version == ARM_SMMU_V2) {
2124                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2125                         dev_err(dev,
2126                               "found only %d context irq(s) but %d required\n",
2127                               smmu->num_context_irqs, smmu->num_context_banks);
2128                         return -ENODEV;
2129                 }
2130
2131                 /* Ignore superfluous interrupts */
2132                 smmu->num_context_irqs = smmu->num_context_banks;
2133         }
2134
2135         for (i = 0; i < smmu->num_global_irqs; ++i) {
2136                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2137                                        arm_smmu_global_fault,
2138                                        IRQF_SHARED,
2139                                        "arm-smmu global fault",
2140                                        smmu);
2141                 if (err) {
2142                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2143                                 i, smmu->irqs[i]);
2144                         return err;
2145                 }
2146         }
2147
2148         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2149                                      "smmu.%pa", &ioaddr);
2150         if (err) {
2151                 dev_err(dev, "Failed to register iommu in sysfs\n");
2152                 return err;
2153         }
2154
2155         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2156         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2157
2158         err = iommu_device_register(&smmu->iommu);
2159         if (err) {
2160                 dev_err(dev, "Failed to register iommu\n");
2161                 return err;
2162         }
2163
2164         platform_set_drvdata(pdev, smmu);
2165         arm_smmu_device_reset(smmu);
2166         arm_smmu_test_smr_masks(smmu);
2167
2168         /*
2169          * For ACPI and generic DT bindings, an SMMU will be probed before
2170          * any device which might need it, so we want the bus ops in place
2171          * ready to handle default domain setup as soon as any SMMU exists.
2172          */
2173         if (!using_legacy_binding)
2174                 arm_smmu_bus_init();
2175
2176         return 0;
2177 }
2178
2179 /*
2180  * With the legacy DT binding in play, though, we have no guarantees about
2181  * probe order, but then we're also not doing default domains, so we can
2182  * delay setting bus ops until we're sure every possible SMMU is ready,
2183  * and that way ensure that no add_device() calls get missed.
2184  */
2185 static int arm_smmu_legacy_bus_init(void)
2186 {
2187         if (using_legacy_binding)
2188                 arm_smmu_bus_init();
2189         return 0;
2190 }
2191 device_initcall_sync(arm_smmu_legacy_bus_init);
2192
2193 static int arm_smmu_device_remove(struct platform_device *pdev)
2194 {
2195         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2196
2197         if (!smmu)
2198                 return -ENODEV;
2199
2200         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2201                 dev_err(&pdev->dev, "removing device with active domains!\n");
2202
2203         /* Turn the thing off */
2204         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2205         return 0;
2206 }
2207
2208 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2209 {
2210         arm_smmu_device_remove(pdev);
2211 }
2212
2213 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2214 {
2215         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2216
2217         arm_smmu_device_reset(smmu);
2218         return 0;
2219 }
2220
2221 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2222
2223 static struct platform_driver arm_smmu_driver = {
2224         .driver = {
2225                 .name           = "arm-smmu",
2226                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2227                 .pm             = &arm_smmu_pm_ops,
2228         },
2229         .probe  = arm_smmu_device_probe,
2230         .remove = arm_smmu_device_remove,
2231         .shutdown = arm_smmu_device_shutdown,
2232 };
2233 module_platform_driver(arm_smmu_driver);
2234
2235 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2236 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2237 MODULE_LICENSE("GPL v2");