GNU Linux-libre 4.4.284-gnu1
[releases.git] / drivers / gpu / drm / radeon / r600_cp.c
1 /*
2  * Copyright 2008-2009 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *     Dave Airlie <airlied@redhat.com>
26  *     Alex Deucher <alexander.deucher@amd.com>
27  *
28  * ------------------------ This file is DEPRECATED! -------------------------
29  */
30
31 #include <linux/module.h>
32
33 #include <drm/drmP.h>
34 #include <drm/radeon_drm.h>
35 #include "radeon_drv.h"
36
37 #define PFP_UCODE_SIZE 576
38 #define PM4_UCODE_SIZE 1792
39 #define R700_PFP_UCODE_SIZE 848
40 #define R700_PM4_UCODE_SIZE 1360
41
42 /* Firmware Names */
43 /*(DEBLOBBED)*/
44
45
46 int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
47                         unsigned family, u32 *ib, int *l);
48 void r600_cs_legacy_init(void);
49
50
51 # define ATI_PCIGART_PAGE_SIZE          4096    /**< PCI GART page size */
52 # define ATI_PCIGART_PAGE_MASK          (~(ATI_PCIGART_PAGE_SIZE-1))
53
54 #define R600_PTE_VALID     (1 << 0)
55 #define R600_PTE_SYSTEM    (1 << 1)
56 #define R600_PTE_SNOOPED   (1 << 2)
57 #define R600_PTE_READABLE  (1 << 5)
58 #define R600_PTE_WRITEABLE (1 << 6)
59
60 /* MAX values used for gfx init */
61 #define R6XX_MAX_SH_GPRS           256
62 #define R6XX_MAX_TEMP_GPRS         16
63 #define R6XX_MAX_SH_THREADS        256
64 #define R6XX_MAX_SH_STACK_ENTRIES  4096
65 #define R6XX_MAX_BACKENDS          8
66 #define R6XX_MAX_BACKENDS_MASK     0xff
67 #define R6XX_MAX_SIMDS             8
68 #define R6XX_MAX_SIMDS_MASK        0xff
69 #define R6XX_MAX_PIPES             8
70 #define R6XX_MAX_PIPES_MASK        0xff
71
72 #define R7XX_MAX_SH_GPRS           256
73 #define R7XX_MAX_TEMP_GPRS         16
74 #define R7XX_MAX_SH_THREADS        256
75 #define R7XX_MAX_SH_STACK_ENTRIES  4096
76 #define R7XX_MAX_BACKENDS          8
77 #define R7XX_MAX_BACKENDS_MASK     0xff
78 #define R7XX_MAX_SIMDS             16
79 #define R7XX_MAX_SIMDS_MASK        0xffff
80 #define R7XX_MAX_PIPES             8
81 #define R7XX_MAX_PIPES_MASK        0xff
82
83 static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
84 {
85         int i;
86
87         dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
88
89         for (i = 0; i < dev_priv->usec_timeout; i++) {
90                 int slots;
91                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
92                         slots = (RADEON_READ(R600_GRBM_STATUS)
93                                  & R700_CMDFIFO_AVAIL_MASK);
94                 else
95                         slots = (RADEON_READ(R600_GRBM_STATUS)
96                                  & R600_CMDFIFO_AVAIL_MASK);
97                 if (slots >= entries)
98                         return 0;
99                 DRM_UDELAY(1);
100         }
101         DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
102                  RADEON_READ(R600_GRBM_STATUS),
103                  RADEON_READ(R600_GRBM_STATUS2));
104
105         return -EBUSY;
106 }
107
108 static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
109 {
110         int i, ret;
111
112         dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
113
114         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
115                 ret = r600_do_wait_for_fifo(dev_priv, 8);
116         else
117                 ret = r600_do_wait_for_fifo(dev_priv, 16);
118         if (ret)
119                 return ret;
120         for (i = 0; i < dev_priv->usec_timeout; i++) {
121                 if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
122                         return 0;
123                 DRM_UDELAY(1);
124         }
125         DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
126                  RADEON_READ(R600_GRBM_STATUS),
127                  RADEON_READ(R600_GRBM_STATUS2));
128
129         return -EBUSY;
130 }
131
132 void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
133 {
134         struct drm_sg_mem *entry = dev->sg;
135         int max_pages;
136         int pages;
137         int i;
138
139         if (!entry)
140                 return;
141
142         if (gart_info->bus_addr) {
143                 max_pages = (gart_info->table_size / sizeof(u64));
144                 pages = (entry->pages <= max_pages)
145                   ? entry->pages : max_pages;
146
147                 for (i = 0; i < pages; i++) {
148                         if (!entry->busaddr[i])
149                                 break;
150                         pci_unmap_page(dev->pdev, entry->busaddr[i],
151                                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
152                 }
153                 if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
154                         gart_info->bus_addr = 0;
155         }
156 }
157
158 /* R600 has page table setup */
159 int r600_page_table_init(struct drm_device *dev)
160 {
161         drm_radeon_private_t *dev_priv = dev->dev_private;
162         struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
163         struct drm_local_map *map = &gart_info->mapping;
164         struct drm_sg_mem *entry = dev->sg;
165         int ret = 0;
166         int i, j;
167         int pages;
168         u64 page_base;
169         dma_addr_t entry_addr;
170         int max_ati_pages, max_real_pages, gart_idx;
171
172         /* okay page table is available - lets rock */
173         max_ati_pages = (gart_info->table_size / sizeof(u64));
174         max_real_pages = max_ati_pages / (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE);
175
176         pages = (entry->pages <= max_real_pages) ?
177                 entry->pages : max_real_pages;
178
179         memset_io((void __iomem *)map->handle, 0, max_ati_pages * sizeof(u64));
180
181         gart_idx = 0;
182         for (i = 0; i < pages; i++) {
183                 entry->busaddr[i] = pci_map_page(dev->pdev,
184                                                  entry->pagelist[i], 0,
185                                                  PAGE_SIZE,
186                                                  PCI_DMA_BIDIRECTIONAL);
187                 if (pci_dma_mapping_error(dev->pdev, entry->busaddr[i])) {
188                         DRM_ERROR("unable to map PCIGART pages!\n");
189                         r600_page_table_cleanup(dev, gart_info);
190                         goto done;
191                 }
192                 entry_addr = entry->busaddr[i];
193                 for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
194                         page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
195                         page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
196                         page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
197
198                         DRM_WRITE64(map, gart_idx * sizeof(u64), page_base);
199
200                         gart_idx++;
201
202                         if ((i % 128) == 0)
203                                 DRM_DEBUG("page entry %d: 0x%016llx\n",
204                                     i, (unsigned long long)page_base);
205                         entry_addr += ATI_PCIGART_PAGE_SIZE;
206                 }
207         }
208         ret = 1;
209 done:
210         return ret;
211 }
212
213 static void r600_vm_flush_gart_range(struct drm_device *dev)
214 {
215         drm_radeon_private_t *dev_priv = dev->dev_private;
216         u32 resp, countdown = 1000;
217         RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
218         RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
219         RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
220
221         do {
222                 resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
223                 countdown--;
224                 DRM_UDELAY(1);
225         } while (((resp & 0xf0) == 0) && countdown);
226 }
227
228 static void r600_vm_init(struct drm_device *dev)
229 {
230         drm_radeon_private_t *dev_priv = dev->dev_private;
231         /* initialise the VM to use the page table we constructed up there */
232         u32 vm_c0, i;
233         u32 mc_rd_a;
234         u32 vm_l2_cntl, vm_l2_cntl3;
235         /* okay set up the PCIE aperture type thingo */
236         RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
237         RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
238         RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
239
240         /* setup MC RD a */
241         mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
242                 R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
243                 R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
244
245         RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
246         RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
247
248         RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
249         RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
250
251         RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
252         RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
253
254         RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
255         RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
256
257         RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
258         RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
259
260         RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
261         RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
262
263         RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
264         RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
265
266         vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
267         vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
268         RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
269
270         RADEON_WRITE(R600_VM_L2_CNTL2, 0);
271         vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
272                        R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
273                        R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
274         RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
275
276         vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
277
278         RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
279
280         vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
281
282         /* disable all other contexts */
283         for (i = 1; i < 8; i++)
284                 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
285
286         RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
287         RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
288         RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
289
290         r600_vm_flush_gart_range(dev);
291 }
292
293 static int r600_cp_init_microcode(drm_radeon_private_t *dev_priv)
294 {
295         struct platform_device *pdev;
296         const char *chip_name;
297         size_t pfp_req_size, me_req_size;
298         char fw_name[30];
299         int err;
300
301         pdev = platform_device_register_simple("r600_cp", 0, NULL, 0);
302         err = IS_ERR(pdev);
303         if (err) {
304                 printk(KERN_ERR "r600_cp: Failed to register firmware\n");
305                 return -EINVAL;
306         }
307
308         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
309         case CHIP_R600:  chip_name = "R600";  break;
310         case CHIP_RV610: chip_name = "RV610"; break;
311         case CHIP_RV630: chip_name = "RV630"; break;
312         case CHIP_RV620: chip_name = "RV620"; break;
313         case CHIP_RV635: chip_name = "RV635"; break;
314         case CHIP_RV670: chip_name = "RV670"; break;
315         case CHIP_RS780:
316         case CHIP_RS880: chip_name = "RS780"; break;
317         case CHIP_RV770: chip_name = "RV770"; break;
318         case CHIP_RV730:
319         case CHIP_RV740: chip_name = "RV730"; break;
320         case CHIP_RV710: chip_name = "RV710"; break;
321         default:         BUG();
322         }
323
324         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
325                 pfp_req_size = R700_PFP_UCODE_SIZE * 4;
326                 me_req_size = R700_PM4_UCODE_SIZE * 4;
327         } else {
328                 pfp_req_size = PFP_UCODE_SIZE * 4;
329                 me_req_size = PM4_UCODE_SIZE * 12;
330         }
331
332         DRM_INFO("Loading %s CP Microcode\n", chip_name);
333
334         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
335         err = reject_firmware(&dev_priv->pfp_fw, fw_name, &pdev->dev);
336         if (err)
337                 goto out;
338         if (dev_priv->pfp_fw->size != pfp_req_size) {
339                 printk(KERN_ERR
340                        "r600_cp: Bogus length %zu in firmware \"%s\"\n",
341                        dev_priv->pfp_fw->size, fw_name);
342                 err = -EINVAL;
343                 goto out;
344         }
345
346         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
347         err = reject_firmware(&dev_priv->me_fw, fw_name, &pdev->dev);
348         if (err)
349                 goto out;
350         if (dev_priv->me_fw->size != me_req_size) {
351                 printk(KERN_ERR
352                        "r600_cp: Bogus length %zu in firmware \"%s\"\n",
353                        dev_priv->me_fw->size, fw_name);
354                 err = -EINVAL;
355         }
356 out:
357         platform_device_unregister(pdev);
358
359         if (err) {
360                 if (err != -EINVAL)
361                         printk(KERN_ERR
362                                "r600_cp: Failed to load firmware \"%s\"\n",
363                                fw_name);
364                 release_firmware(dev_priv->pfp_fw);
365                 dev_priv->pfp_fw = NULL;
366                 release_firmware(dev_priv->me_fw);
367                 dev_priv->me_fw = NULL;
368         }
369         return err;
370 }
371
372 static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
373 {
374         const __be32 *fw_data;
375         int i;
376
377         if (!dev_priv->me_fw || !dev_priv->pfp_fw)
378                 return;
379
380         r600_do_cp_stop(dev_priv);
381
382         RADEON_WRITE(R600_CP_RB_CNTL,
383 #ifdef __BIG_ENDIAN
384                      R600_BUF_SWAP_32BIT |
385 #endif
386                      R600_RB_NO_UPDATE |
387                      R600_RB_BLKSZ(15) |
388                      R600_RB_BUFSZ(3));
389
390         RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
391         RADEON_READ(R600_GRBM_SOFT_RESET);
392         mdelay(15);
393         RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
394
395         fw_data = (const __be32 *)dev_priv->me_fw->data;
396         RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
397         for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
398                 RADEON_WRITE(R600_CP_ME_RAM_DATA,
399                              be32_to_cpup(fw_data++));
400
401         fw_data = (const __be32 *)dev_priv->pfp_fw->data;
402         RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
403         for (i = 0; i < PFP_UCODE_SIZE; i++)
404                 RADEON_WRITE(R600_CP_PFP_UCODE_DATA,
405                              be32_to_cpup(fw_data++));
406
407         RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
408         RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
409         RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
410
411 }
412
413 static void r700_vm_init(struct drm_device *dev)
414 {
415         drm_radeon_private_t *dev_priv = dev->dev_private;
416         /* initialise the VM to use the page table we constructed up there */
417         u32 vm_c0, i;
418         u32 mc_vm_md_l1;
419         u32 vm_l2_cntl, vm_l2_cntl3;
420         /* okay set up the PCIE aperture type thingo */
421         RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
422         RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
423         RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
424
425         mc_vm_md_l1 = R700_ENABLE_L1_TLB |
426             R700_ENABLE_L1_FRAGMENT_PROCESSING |
427             R700_SYSTEM_ACCESS_MODE_IN_SYS |
428             R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
429             R700_EFFECTIVE_L1_TLB_SIZE(5) |
430             R700_EFFECTIVE_L1_QUEUE_SIZE(5);
431
432         RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
433         RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
434         RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
435         RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
436         RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
437         RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
438         RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
439
440         vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
441         vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
442         RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
443
444         RADEON_WRITE(R600_VM_L2_CNTL2, 0);
445         vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
446         RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
447
448         vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
449
450         RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
451
452         vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
453
454         /* disable all other contexts */
455         for (i = 1; i < 8; i++)
456                 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
457
458         RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
459         RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
460         RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
461
462         r600_vm_flush_gart_range(dev);
463 }
464
465 static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
466 {
467         const __be32 *fw_data;
468         int i;
469
470         if (!dev_priv->me_fw || !dev_priv->pfp_fw)
471                 return;
472
473         r600_do_cp_stop(dev_priv);
474
475         RADEON_WRITE(R600_CP_RB_CNTL,
476 #ifdef __BIG_ENDIAN
477                      R600_BUF_SWAP_32BIT |
478 #endif
479                      R600_RB_NO_UPDATE |
480                      R600_RB_BLKSZ(15) |
481                      R600_RB_BUFSZ(3));
482
483         RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
484         RADEON_READ(R600_GRBM_SOFT_RESET);
485         mdelay(15);
486         RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
487
488         fw_data = (const __be32 *)dev_priv->pfp_fw->data;
489         RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
490         for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
491                 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
492         RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
493
494         fw_data = (const __be32 *)dev_priv->me_fw->data;
495         RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
496         for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
497                 RADEON_WRITE(R600_CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
498         RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
499
500         RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
501         RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
502         RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
503
504 }
505
506 static void r600_test_writeback(drm_radeon_private_t *dev_priv)
507 {
508         u32 tmp;
509
510         /* Start with assuming that writeback doesn't work */
511         dev_priv->writeback_works = 0;
512
513         /* Writeback doesn't seem to work everywhere, test it here and possibly
514          * enable it if it appears to work
515          */
516         radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
517
518         RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
519
520         for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
521                 u32 val;
522
523                 val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
524                 if (val == 0xdeadbeef)
525                         break;
526                 DRM_UDELAY(1);
527         }
528
529         if (tmp < dev_priv->usec_timeout) {
530                 dev_priv->writeback_works = 1;
531                 DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
532         } else {
533                 dev_priv->writeback_works = 0;
534                 DRM_INFO("writeback test failed\n");
535         }
536         if (radeon_no_wb == 1) {
537                 dev_priv->writeback_works = 0;
538                 DRM_INFO("writeback forced off\n");
539         }
540
541         if (!dev_priv->writeback_works) {
542                 /* Disable writeback to avoid unnecessary bus master transfer */
543                 RADEON_WRITE(R600_CP_RB_CNTL,
544 #ifdef __BIG_ENDIAN
545                              R600_BUF_SWAP_32BIT |
546 #endif
547                              RADEON_READ(R600_CP_RB_CNTL) |
548                              R600_RB_NO_UPDATE);
549                 RADEON_WRITE(R600_SCRATCH_UMSK, 0);
550         }
551 }
552
553 int r600_do_engine_reset(struct drm_device *dev)
554 {
555         drm_radeon_private_t *dev_priv = dev->dev_private;
556         u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
557
558         DRM_INFO("Resetting GPU\n");
559
560         cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
561         cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
562         RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
563
564         RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
565         RADEON_READ(R600_GRBM_SOFT_RESET);
566         DRM_UDELAY(50);
567         RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
568         RADEON_READ(R600_GRBM_SOFT_RESET);
569
570         RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
571         cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
572         RADEON_WRITE(R600_CP_RB_CNTL,
573 #ifdef __BIG_ENDIAN
574                      R600_BUF_SWAP_32BIT |
575 #endif
576                      R600_RB_RPTR_WR_ENA);
577
578         RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
579         RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
580         RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
581         RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
582
583         /* Reset the CP ring */
584         r600_do_cp_reset(dev_priv);
585
586         /* The CP is no longer running after an engine reset */
587         dev_priv->cp_running = 0;
588
589         /* Reset any pending vertex, indirect buffers */
590         radeon_freelist_reset(dev);
591
592         return 0;
593
594 }
595
596 static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
597                                              u32 num_backends,
598                                              u32 backend_disable_mask)
599 {
600         u32 backend_map = 0;
601         u32 enabled_backends_mask;
602         u32 enabled_backends_count;
603         u32 cur_pipe;
604         u32 swizzle_pipe[R6XX_MAX_PIPES];
605         u32 cur_backend;
606         u32 i;
607
608         if (num_tile_pipes > R6XX_MAX_PIPES)
609                 num_tile_pipes = R6XX_MAX_PIPES;
610         if (num_tile_pipes < 1)
611                 num_tile_pipes = 1;
612         if (num_backends > R6XX_MAX_BACKENDS)
613                 num_backends = R6XX_MAX_BACKENDS;
614         if (num_backends < 1)
615                 num_backends = 1;
616
617         enabled_backends_mask = 0;
618         enabled_backends_count = 0;
619         for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
620                 if (((backend_disable_mask >> i) & 1) == 0) {
621                         enabled_backends_mask |= (1 << i);
622                         ++enabled_backends_count;
623                 }
624                 if (enabled_backends_count == num_backends)
625                         break;
626         }
627
628         if (enabled_backends_count == 0) {
629                 enabled_backends_mask = 1;
630                 enabled_backends_count = 1;
631         }
632
633         if (enabled_backends_count != num_backends)
634                 num_backends = enabled_backends_count;
635
636         memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
637         switch (num_tile_pipes) {
638         case 1:
639                 swizzle_pipe[0] = 0;
640                 break;
641         case 2:
642                 swizzle_pipe[0] = 0;
643                 swizzle_pipe[1] = 1;
644                 break;
645         case 3:
646                 swizzle_pipe[0] = 0;
647                 swizzle_pipe[1] = 1;
648                 swizzle_pipe[2] = 2;
649                 break;
650         case 4:
651                 swizzle_pipe[0] = 0;
652                 swizzle_pipe[1] = 1;
653                 swizzle_pipe[2] = 2;
654                 swizzle_pipe[3] = 3;
655                 break;
656         case 5:
657                 swizzle_pipe[0] = 0;
658                 swizzle_pipe[1] = 1;
659                 swizzle_pipe[2] = 2;
660                 swizzle_pipe[3] = 3;
661                 swizzle_pipe[4] = 4;
662                 break;
663         case 6:
664                 swizzle_pipe[0] = 0;
665                 swizzle_pipe[1] = 2;
666                 swizzle_pipe[2] = 4;
667                 swizzle_pipe[3] = 5;
668                 swizzle_pipe[4] = 1;
669                 swizzle_pipe[5] = 3;
670                 break;
671         case 7:
672                 swizzle_pipe[0] = 0;
673                 swizzle_pipe[1] = 2;
674                 swizzle_pipe[2] = 4;
675                 swizzle_pipe[3] = 6;
676                 swizzle_pipe[4] = 1;
677                 swizzle_pipe[5] = 3;
678                 swizzle_pipe[6] = 5;
679                 break;
680         case 8:
681                 swizzle_pipe[0] = 0;
682                 swizzle_pipe[1] = 2;
683                 swizzle_pipe[2] = 4;
684                 swizzle_pipe[3] = 6;
685                 swizzle_pipe[4] = 1;
686                 swizzle_pipe[5] = 3;
687                 swizzle_pipe[6] = 5;
688                 swizzle_pipe[7] = 7;
689                 break;
690         }
691
692         cur_backend = 0;
693         for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
694                 while (((1 << cur_backend) & enabled_backends_mask) == 0)
695                         cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
696
697                 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
698
699                 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
700         }
701
702         return backend_map;
703 }
704
705 static int r600_count_pipe_bits(uint32_t val)
706 {
707         return hweight32(val);
708 }
709
710 static void r600_gfx_init(struct drm_device *dev,
711                           drm_radeon_private_t *dev_priv)
712 {
713         int i, j, num_qd_pipes;
714         u32 sx_debug_1;
715         u32 tc_cntl;
716         u32 arb_pop;
717         u32 num_gs_verts_per_thread;
718         u32 vgt_gs_per_es;
719         u32 gs_prim_buffer_depth = 0;
720         u32 sq_ms_fifo_sizes;
721         u32 sq_config;
722         u32 sq_gpr_resource_mgmt_1 = 0;
723         u32 sq_gpr_resource_mgmt_2 = 0;
724         u32 sq_thread_resource_mgmt = 0;
725         u32 sq_stack_resource_mgmt_1 = 0;
726         u32 sq_stack_resource_mgmt_2 = 0;
727         u32 hdp_host_path_cntl;
728         u32 backend_map;
729         u32 gb_tiling_config = 0;
730         u32 cc_rb_backend_disable;
731         u32 cc_gc_shader_pipe_config;
732         u32 ramcfg;
733
734         /* setup chip specs */
735         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
736         case CHIP_R600:
737                 dev_priv->r600_max_pipes = 4;
738                 dev_priv->r600_max_tile_pipes = 8;
739                 dev_priv->r600_max_simds = 4;
740                 dev_priv->r600_max_backends = 4;
741                 dev_priv->r600_max_gprs = 256;
742                 dev_priv->r600_max_threads = 192;
743                 dev_priv->r600_max_stack_entries = 256;
744                 dev_priv->r600_max_hw_contexts = 8;
745                 dev_priv->r600_max_gs_threads = 16;
746                 dev_priv->r600_sx_max_export_size = 128;
747                 dev_priv->r600_sx_max_export_pos_size = 16;
748                 dev_priv->r600_sx_max_export_smx_size = 128;
749                 dev_priv->r600_sq_num_cf_insts = 2;
750                 break;
751         case CHIP_RV630:
752         case CHIP_RV635:
753                 dev_priv->r600_max_pipes = 2;
754                 dev_priv->r600_max_tile_pipes = 2;
755                 dev_priv->r600_max_simds = 3;
756                 dev_priv->r600_max_backends = 1;
757                 dev_priv->r600_max_gprs = 128;
758                 dev_priv->r600_max_threads = 192;
759                 dev_priv->r600_max_stack_entries = 128;
760                 dev_priv->r600_max_hw_contexts = 8;
761                 dev_priv->r600_max_gs_threads = 4;
762                 dev_priv->r600_sx_max_export_size = 128;
763                 dev_priv->r600_sx_max_export_pos_size = 16;
764                 dev_priv->r600_sx_max_export_smx_size = 128;
765                 dev_priv->r600_sq_num_cf_insts = 2;
766                 break;
767         case CHIP_RV610:
768         case CHIP_RS780:
769         case CHIP_RS880:
770         case CHIP_RV620:
771                 dev_priv->r600_max_pipes = 1;
772                 dev_priv->r600_max_tile_pipes = 1;
773                 dev_priv->r600_max_simds = 2;
774                 dev_priv->r600_max_backends = 1;
775                 dev_priv->r600_max_gprs = 128;
776                 dev_priv->r600_max_threads = 192;
777                 dev_priv->r600_max_stack_entries = 128;
778                 dev_priv->r600_max_hw_contexts = 4;
779                 dev_priv->r600_max_gs_threads = 4;
780                 dev_priv->r600_sx_max_export_size = 128;
781                 dev_priv->r600_sx_max_export_pos_size = 16;
782                 dev_priv->r600_sx_max_export_smx_size = 128;
783                 dev_priv->r600_sq_num_cf_insts = 1;
784                 break;
785         case CHIP_RV670:
786                 dev_priv->r600_max_pipes = 4;
787                 dev_priv->r600_max_tile_pipes = 4;
788                 dev_priv->r600_max_simds = 4;
789                 dev_priv->r600_max_backends = 4;
790                 dev_priv->r600_max_gprs = 192;
791                 dev_priv->r600_max_threads = 192;
792                 dev_priv->r600_max_stack_entries = 256;
793                 dev_priv->r600_max_hw_contexts = 8;
794                 dev_priv->r600_max_gs_threads = 16;
795                 dev_priv->r600_sx_max_export_size = 128;
796                 dev_priv->r600_sx_max_export_pos_size = 16;
797                 dev_priv->r600_sx_max_export_smx_size = 128;
798                 dev_priv->r600_sq_num_cf_insts = 2;
799                 break;
800         default:
801                 break;
802         }
803
804         /* Initialize HDP */
805         j = 0;
806         for (i = 0; i < 32; i++) {
807                 RADEON_WRITE((0x2c14 + j), 0x00000000);
808                 RADEON_WRITE((0x2c18 + j), 0x00000000);
809                 RADEON_WRITE((0x2c1c + j), 0x00000000);
810                 RADEON_WRITE((0x2c20 + j), 0x00000000);
811                 RADEON_WRITE((0x2c24 + j), 0x00000000);
812                 j += 0x18;
813         }
814
815         RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
816
817         /* setup tiling, simd, pipe config */
818         ramcfg = RADEON_READ(R600_RAMCFG);
819
820         switch (dev_priv->r600_max_tile_pipes) {
821         case 1:
822                 gb_tiling_config |= R600_PIPE_TILING(0);
823                 break;
824         case 2:
825                 gb_tiling_config |= R600_PIPE_TILING(1);
826                 break;
827         case 4:
828                 gb_tiling_config |= R600_PIPE_TILING(2);
829                 break;
830         case 8:
831                 gb_tiling_config |= R600_PIPE_TILING(3);
832                 break;
833         default:
834                 break;
835         }
836
837         gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
838
839         gb_tiling_config |= R600_GROUP_SIZE(0);
840
841         if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
842                 gb_tiling_config |= R600_ROW_TILING(3);
843                 gb_tiling_config |= R600_SAMPLE_SPLIT(3);
844         } else {
845                 gb_tiling_config |=
846                         R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
847                 gb_tiling_config |=
848                         R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
849         }
850
851         gb_tiling_config |= R600_BANK_SWAPS(1);
852
853         cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
854         cc_rb_backend_disable |=
855                 R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
856
857         cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
858         cc_gc_shader_pipe_config |=
859                 R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
860         cc_gc_shader_pipe_config |=
861                 R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
862
863         backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
864                                                         (R6XX_MAX_BACKENDS -
865                                                          r600_count_pipe_bits((cc_rb_backend_disable &
866                                                                                R6XX_MAX_BACKENDS_MASK) >> 16)),
867                                                         (cc_rb_backend_disable >> 16));
868         gb_tiling_config |= R600_BACKEND_MAP(backend_map);
869
870         RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
871         RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
872         RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
873         if (gb_tiling_config & 0xc0) {
874                 dev_priv->r600_group_size = 512;
875         } else {
876                 dev_priv->r600_group_size = 256;
877         }
878         dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
879         if (gb_tiling_config & 0x30) {
880                 dev_priv->r600_nbanks = 8;
881         } else {
882                 dev_priv->r600_nbanks = 4;
883         }
884
885         RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
886         RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
887         RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
888
889         num_qd_pipes =
890                 R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
891         RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
892         RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
893
894         /* set HW defaults for 3D engine */
895         RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
896                                                 R600_ROQ_IB2_START(0x2b)));
897
898         RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
899                                               R600_ROQ_END(0x40)));
900
901         RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
902                                         R600_SYNC_GRADIENT |
903                                         R600_SYNC_WALKER |
904                                         R600_SYNC_ALIGNER));
905
906         if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
907                 RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
908
909         sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
910         sx_debug_1 |= R600_SMX_EVENT_RELEASE;
911         if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
912                 sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
913         RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
914
915         if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
916             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
917             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
918             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
919             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
920             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
921                 RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
922         else
923                 RADEON_WRITE(R600_DB_DEBUG, 0);
924
925         RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
926                                           R600_DEPTH_FLUSH(16) |
927                                           R600_DEPTH_PENDING_FREE(4) |
928                                           R600_DEPTH_CACHELINE_FREE(16)));
929         RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
930         RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
931
932         RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
933         RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
934
935         sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
936         if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
937             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
938             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
939             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
940                 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
941                                     R600_FETCH_FIFO_HIWATER(0xa) |
942                                     R600_DONE_FIFO_HIWATER(0xe0) |
943                                     R600_ALU_UPDATE_FIFO_HIWATER(0x8));
944         } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
945                    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
946                 sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
947                 sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
948         }
949         RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
950
951         /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
952          * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
953          */
954         sq_config = RADEON_READ(R600_SQ_CONFIG);
955         sq_config &= ~(R600_PS_PRIO(3) |
956                        R600_VS_PRIO(3) |
957                        R600_GS_PRIO(3) |
958                        R600_ES_PRIO(3));
959         sq_config |= (R600_DX9_CONSTS |
960                       R600_VC_ENABLE |
961                       R600_PS_PRIO(0) |
962                       R600_VS_PRIO(1) |
963                       R600_GS_PRIO(2) |
964                       R600_ES_PRIO(3));
965
966         if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
967                 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
968                                           R600_NUM_VS_GPRS(124) |
969                                           R600_NUM_CLAUSE_TEMP_GPRS(4));
970                 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
971                                           R600_NUM_ES_GPRS(0));
972                 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
973                                            R600_NUM_VS_THREADS(48) |
974                                            R600_NUM_GS_THREADS(4) |
975                                            R600_NUM_ES_THREADS(4));
976                 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
977                                             R600_NUM_VS_STACK_ENTRIES(128));
978                 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
979                                             R600_NUM_ES_STACK_ENTRIES(0));
980         } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
981                    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
982                    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
983                    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
984                 /* no vertex cache */
985                 sq_config &= ~R600_VC_ENABLE;
986
987                 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
988                                           R600_NUM_VS_GPRS(44) |
989                                           R600_NUM_CLAUSE_TEMP_GPRS(2));
990                 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
991                                           R600_NUM_ES_GPRS(17));
992                 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
993                                            R600_NUM_VS_THREADS(78) |
994                                            R600_NUM_GS_THREADS(4) |
995                                            R600_NUM_ES_THREADS(31));
996                 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
997                                             R600_NUM_VS_STACK_ENTRIES(40));
998                 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
999                                             R600_NUM_ES_STACK_ENTRIES(16));
1000         } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
1001                    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
1002                 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1003                                           R600_NUM_VS_GPRS(44) |
1004                                           R600_NUM_CLAUSE_TEMP_GPRS(2));
1005                 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
1006                                           R600_NUM_ES_GPRS(18));
1007                 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1008                                            R600_NUM_VS_THREADS(78) |
1009                                            R600_NUM_GS_THREADS(4) |
1010                                            R600_NUM_ES_THREADS(31));
1011                 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
1012                                             R600_NUM_VS_STACK_ENTRIES(40));
1013                 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
1014                                             R600_NUM_ES_STACK_ENTRIES(16));
1015         } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
1016                 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1017                                           R600_NUM_VS_GPRS(44) |
1018                                           R600_NUM_CLAUSE_TEMP_GPRS(2));
1019                 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
1020                                           R600_NUM_ES_GPRS(17));
1021                 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1022                                            R600_NUM_VS_THREADS(78) |
1023                                            R600_NUM_GS_THREADS(4) |
1024                                            R600_NUM_ES_THREADS(31));
1025                 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
1026                                             R600_NUM_VS_STACK_ENTRIES(64));
1027                 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
1028                                             R600_NUM_ES_STACK_ENTRIES(64));
1029         }
1030
1031         RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1032         RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
1033         RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
1034         RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1035         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
1036         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
1037
1038         if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1039             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1040             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1041             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
1042                 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
1043         else
1044                 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
1045
1046         RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
1047                                                     R600_S0_Y(0x4) |
1048                                                     R600_S1_X(0x4) |
1049                                                     R600_S1_Y(0xc)));
1050         RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
1051                                                     R600_S0_Y(0xe) |
1052                                                     R600_S1_X(0x2) |
1053                                                     R600_S1_Y(0x2) |
1054                                                     R600_S2_X(0xa) |
1055                                                     R600_S2_Y(0x6) |
1056                                                     R600_S3_X(0x6) |
1057                                                     R600_S3_Y(0xa)));
1058         RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
1059                                                         R600_S0_Y(0xb) |
1060                                                         R600_S1_X(0x4) |
1061                                                         R600_S1_Y(0xc) |
1062                                                         R600_S2_X(0x1) |
1063                                                         R600_S2_Y(0x6) |
1064                                                         R600_S3_X(0xa) |
1065                                                         R600_S3_Y(0xe)));
1066         RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
1067                                                         R600_S4_Y(0x1) |
1068                                                         R600_S5_X(0x0) |
1069                                                         R600_S5_Y(0x0) |
1070                                                         R600_S6_X(0xb) |
1071                                                         R600_S6_Y(0x4) |
1072                                                         R600_S7_X(0x7) |
1073                                                         R600_S7_Y(0x8)));
1074
1075
1076         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1077         case CHIP_R600:
1078         case CHIP_RV630:
1079         case CHIP_RV635:
1080                 gs_prim_buffer_depth = 0;
1081                 break;
1082         case CHIP_RV610:
1083         case CHIP_RS780:
1084         case CHIP_RS880:
1085         case CHIP_RV620:
1086                 gs_prim_buffer_depth = 32;
1087                 break;
1088         case CHIP_RV670:
1089                 gs_prim_buffer_depth = 128;
1090                 break;
1091         default:
1092                 break;
1093         }
1094
1095         num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1096         vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1097         /* Max value for this is 256 */
1098         if (vgt_gs_per_es > 256)
1099                 vgt_gs_per_es = 256;
1100
1101         RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1102         RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1103         RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1104         RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1105
1106         /* more default values. 2D/3D driver should adjust as needed */
1107         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1108         RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1109         RADEON_WRITE(R600_SX_MISC, 0);
1110         RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1111         RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1112         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1113         RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1114         RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1115         RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1116
1117         /* clear render buffer base addresses */
1118         RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1119         RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1120         RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1121         RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1122         RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1123         RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1124         RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1125         RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1126
1127         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1128         case CHIP_RV610:
1129         case CHIP_RS780:
1130         case CHIP_RS880:
1131         case CHIP_RV620:
1132                 tc_cntl = R600_TC_L2_SIZE(8);
1133                 break;
1134         case CHIP_RV630:
1135         case CHIP_RV635:
1136                 tc_cntl = R600_TC_L2_SIZE(4);
1137                 break;
1138         case CHIP_R600:
1139                 tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
1140                 break;
1141         default:
1142                 tc_cntl = R600_TC_L2_SIZE(0);
1143                 break;
1144         }
1145
1146         RADEON_WRITE(R600_TC_CNTL, tc_cntl);
1147
1148         hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1149         RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1150
1151         arb_pop = RADEON_READ(R600_ARB_POP);
1152         arb_pop |= R600_ENABLE_TC128;
1153         RADEON_WRITE(R600_ARB_POP, arb_pop);
1154
1155         RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1156         RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1157                                           R600_NUM_CLIP_SEQ(3)));
1158         RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
1159
1160 }
1161
1162 static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv,
1163                                              u32 num_tile_pipes,
1164                                              u32 num_backends,
1165                                              u32 backend_disable_mask)
1166 {
1167         u32 backend_map = 0;
1168         u32 enabled_backends_mask;
1169         u32 enabled_backends_count;
1170         u32 cur_pipe;
1171         u32 swizzle_pipe[R7XX_MAX_PIPES];
1172         u32 cur_backend;
1173         u32 i;
1174         bool force_no_swizzle;
1175
1176         if (num_tile_pipes > R7XX_MAX_PIPES)
1177                 num_tile_pipes = R7XX_MAX_PIPES;
1178         if (num_tile_pipes < 1)
1179                 num_tile_pipes = 1;
1180         if (num_backends > R7XX_MAX_BACKENDS)
1181                 num_backends = R7XX_MAX_BACKENDS;
1182         if (num_backends < 1)
1183                 num_backends = 1;
1184
1185         enabled_backends_mask = 0;
1186         enabled_backends_count = 0;
1187         for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
1188                 if (((backend_disable_mask >> i) & 1) == 0) {
1189                         enabled_backends_mask |= (1 << i);
1190                         ++enabled_backends_count;
1191                 }
1192                 if (enabled_backends_count == num_backends)
1193                         break;
1194         }
1195
1196         if (enabled_backends_count == 0) {
1197                 enabled_backends_mask = 1;
1198                 enabled_backends_count = 1;
1199         }
1200
1201         if (enabled_backends_count != num_backends)
1202                 num_backends = enabled_backends_count;
1203
1204         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1205         case CHIP_RV770:
1206         case CHIP_RV730:
1207                 force_no_swizzle = false;
1208                 break;
1209         case CHIP_RV710:
1210         case CHIP_RV740:
1211         default:
1212                 force_no_swizzle = true;
1213                 break;
1214         }
1215
1216         memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
1217         switch (num_tile_pipes) {
1218         case 1:
1219                 swizzle_pipe[0] = 0;
1220                 break;
1221         case 2:
1222                 swizzle_pipe[0] = 0;
1223                 swizzle_pipe[1] = 1;
1224                 break;
1225         case 3:
1226                 if (force_no_swizzle) {
1227                         swizzle_pipe[0] = 0;
1228                         swizzle_pipe[1] = 1;
1229                         swizzle_pipe[2] = 2;
1230                 } else {
1231                         swizzle_pipe[0] = 0;
1232                         swizzle_pipe[1] = 2;
1233                         swizzle_pipe[2] = 1;
1234                 }
1235                 break;
1236         case 4:
1237                 if (force_no_swizzle) {
1238                         swizzle_pipe[0] = 0;
1239                         swizzle_pipe[1] = 1;
1240                         swizzle_pipe[2] = 2;
1241                         swizzle_pipe[3] = 3;
1242                 } else {
1243                         swizzle_pipe[0] = 0;
1244                         swizzle_pipe[1] = 2;
1245                         swizzle_pipe[2] = 3;
1246                         swizzle_pipe[3] = 1;
1247                 }
1248                 break;
1249         case 5:
1250                 if (force_no_swizzle) {
1251                         swizzle_pipe[0] = 0;
1252                         swizzle_pipe[1] = 1;
1253                         swizzle_pipe[2] = 2;
1254                         swizzle_pipe[3] = 3;
1255                         swizzle_pipe[4] = 4;
1256                 } else {
1257                         swizzle_pipe[0] = 0;
1258                         swizzle_pipe[1] = 2;
1259                         swizzle_pipe[2] = 4;
1260                         swizzle_pipe[3] = 1;
1261                         swizzle_pipe[4] = 3;
1262                 }
1263                 break;
1264         case 6:
1265                 if (force_no_swizzle) {
1266                         swizzle_pipe[0] = 0;
1267                         swizzle_pipe[1] = 1;
1268                         swizzle_pipe[2] = 2;
1269                         swizzle_pipe[3] = 3;
1270                         swizzle_pipe[4] = 4;
1271                         swizzle_pipe[5] = 5;
1272                 } else {
1273                         swizzle_pipe[0] = 0;
1274                         swizzle_pipe[1] = 2;
1275                         swizzle_pipe[2] = 4;
1276                         swizzle_pipe[3] = 5;
1277                         swizzle_pipe[4] = 3;
1278                         swizzle_pipe[5] = 1;
1279                 }
1280                 break;
1281         case 7:
1282                 if (force_no_swizzle) {
1283                         swizzle_pipe[0] = 0;
1284                         swizzle_pipe[1] = 1;
1285                         swizzle_pipe[2] = 2;
1286                         swizzle_pipe[3] = 3;
1287                         swizzle_pipe[4] = 4;
1288                         swizzle_pipe[5] = 5;
1289                         swizzle_pipe[6] = 6;
1290                 } else {
1291                         swizzle_pipe[0] = 0;
1292                         swizzle_pipe[1] = 2;
1293                         swizzle_pipe[2] = 4;
1294                         swizzle_pipe[3] = 6;
1295                         swizzle_pipe[4] = 3;
1296                         swizzle_pipe[5] = 1;
1297                         swizzle_pipe[6] = 5;
1298                 }
1299                 break;
1300         case 8:
1301                 if (force_no_swizzle) {
1302                         swizzle_pipe[0] = 0;
1303                         swizzle_pipe[1] = 1;
1304                         swizzle_pipe[2] = 2;
1305                         swizzle_pipe[3] = 3;
1306                         swizzle_pipe[4] = 4;
1307                         swizzle_pipe[5] = 5;
1308                         swizzle_pipe[6] = 6;
1309                         swizzle_pipe[7] = 7;
1310                 } else {
1311                         swizzle_pipe[0] = 0;
1312                         swizzle_pipe[1] = 2;
1313                         swizzle_pipe[2] = 4;
1314                         swizzle_pipe[3] = 6;
1315                         swizzle_pipe[4] = 3;
1316                         swizzle_pipe[5] = 1;
1317                         swizzle_pipe[6] = 7;
1318                         swizzle_pipe[7] = 5;
1319                 }
1320                 break;
1321         }
1322
1323         cur_backend = 0;
1324         for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1325                 while (((1 << cur_backend) & enabled_backends_mask) == 0)
1326                         cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1327
1328                 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
1329
1330                 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1331         }
1332
1333         return backend_map;
1334 }
1335
1336 static void r700_gfx_init(struct drm_device *dev,
1337                           drm_radeon_private_t *dev_priv)
1338 {
1339         int i, j, num_qd_pipes;
1340         u32 ta_aux_cntl;
1341         u32 sx_debug_1;
1342         u32 smx_dc_ctl0;
1343         u32 db_debug3;
1344         u32 num_gs_verts_per_thread;
1345         u32 vgt_gs_per_es;
1346         u32 gs_prim_buffer_depth = 0;
1347         u32 sq_ms_fifo_sizes;
1348         u32 sq_config;
1349         u32 sq_thread_resource_mgmt;
1350         u32 hdp_host_path_cntl;
1351         u32 sq_dyn_gpr_size_simd_ab_0;
1352         u32 backend_map;
1353         u32 gb_tiling_config = 0;
1354         u32 cc_rb_backend_disable;
1355         u32 cc_gc_shader_pipe_config;
1356         u32 mc_arb_ramcfg;
1357         u32 db_debug4;
1358
1359         /* setup chip specs */
1360         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1361         case CHIP_RV770:
1362                 dev_priv->r600_max_pipes = 4;
1363                 dev_priv->r600_max_tile_pipes = 8;
1364                 dev_priv->r600_max_simds = 10;
1365                 dev_priv->r600_max_backends = 4;
1366                 dev_priv->r600_max_gprs = 256;
1367                 dev_priv->r600_max_threads = 248;
1368                 dev_priv->r600_max_stack_entries = 512;
1369                 dev_priv->r600_max_hw_contexts = 8;
1370                 dev_priv->r600_max_gs_threads = 16 * 2;
1371                 dev_priv->r600_sx_max_export_size = 128;
1372                 dev_priv->r600_sx_max_export_pos_size = 16;
1373                 dev_priv->r600_sx_max_export_smx_size = 112;
1374                 dev_priv->r600_sq_num_cf_insts = 2;
1375
1376                 dev_priv->r700_sx_num_of_sets = 7;
1377                 dev_priv->r700_sc_prim_fifo_size = 0xF9;
1378                 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1379                 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1380                 break;
1381         case CHIP_RV730:
1382                 dev_priv->r600_max_pipes = 2;
1383                 dev_priv->r600_max_tile_pipes = 4;
1384                 dev_priv->r600_max_simds = 8;
1385                 dev_priv->r600_max_backends = 2;
1386                 dev_priv->r600_max_gprs = 128;
1387                 dev_priv->r600_max_threads = 248;
1388                 dev_priv->r600_max_stack_entries = 256;
1389                 dev_priv->r600_max_hw_contexts = 8;
1390                 dev_priv->r600_max_gs_threads = 16 * 2;
1391                 dev_priv->r600_sx_max_export_size = 256;
1392                 dev_priv->r600_sx_max_export_pos_size = 32;
1393                 dev_priv->r600_sx_max_export_smx_size = 224;
1394                 dev_priv->r600_sq_num_cf_insts = 2;
1395
1396                 dev_priv->r700_sx_num_of_sets = 7;
1397                 dev_priv->r700_sc_prim_fifo_size = 0xf9;
1398                 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1399                 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1400                 if (dev_priv->r600_sx_max_export_pos_size > 16) {
1401                         dev_priv->r600_sx_max_export_pos_size -= 16;
1402                         dev_priv->r600_sx_max_export_smx_size += 16;
1403                 }
1404                 break;
1405         case CHIP_RV710:
1406                 dev_priv->r600_max_pipes = 2;
1407                 dev_priv->r600_max_tile_pipes = 2;
1408                 dev_priv->r600_max_simds = 2;
1409                 dev_priv->r600_max_backends = 1;
1410                 dev_priv->r600_max_gprs = 256;
1411                 dev_priv->r600_max_threads = 192;
1412                 dev_priv->r600_max_stack_entries = 256;
1413                 dev_priv->r600_max_hw_contexts = 4;
1414                 dev_priv->r600_max_gs_threads = 8 * 2;
1415                 dev_priv->r600_sx_max_export_size = 128;
1416                 dev_priv->r600_sx_max_export_pos_size = 16;
1417                 dev_priv->r600_sx_max_export_smx_size = 112;
1418                 dev_priv->r600_sq_num_cf_insts = 1;
1419
1420                 dev_priv->r700_sx_num_of_sets = 7;
1421                 dev_priv->r700_sc_prim_fifo_size = 0x40;
1422                 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1423                 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1424                 break;
1425         case CHIP_RV740:
1426                 dev_priv->r600_max_pipes = 4;
1427                 dev_priv->r600_max_tile_pipes = 4;
1428                 dev_priv->r600_max_simds = 8;
1429                 dev_priv->r600_max_backends = 4;
1430                 dev_priv->r600_max_gprs = 256;
1431                 dev_priv->r600_max_threads = 248;
1432                 dev_priv->r600_max_stack_entries = 512;
1433                 dev_priv->r600_max_hw_contexts = 8;
1434                 dev_priv->r600_max_gs_threads = 16 * 2;
1435                 dev_priv->r600_sx_max_export_size = 256;
1436                 dev_priv->r600_sx_max_export_pos_size = 32;
1437                 dev_priv->r600_sx_max_export_smx_size = 224;
1438                 dev_priv->r600_sq_num_cf_insts = 2;
1439
1440                 dev_priv->r700_sx_num_of_sets = 7;
1441                 dev_priv->r700_sc_prim_fifo_size = 0x100;
1442                 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1443                 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1444
1445                 if (dev_priv->r600_sx_max_export_pos_size > 16) {
1446                         dev_priv->r600_sx_max_export_pos_size -= 16;
1447                         dev_priv->r600_sx_max_export_smx_size += 16;
1448                 }
1449                 break;
1450         default:
1451                 break;
1452         }
1453
1454         /* Initialize HDP */
1455         j = 0;
1456         for (i = 0; i < 32; i++) {
1457                 RADEON_WRITE((0x2c14 + j), 0x00000000);
1458                 RADEON_WRITE((0x2c18 + j), 0x00000000);
1459                 RADEON_WRITE((0x2c1c + j), 0x00000000);
1460                 RADEON_WRITE((0x2c20 + j), 0x00000000);
1461                 RADEON_WRITE((0x2c24 + j), 0x00000000);
1462                 j += 0x18;
1463         }
1464
1465         RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
1466
1467         /* setup tiling, simd, pipe config */
1468         mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
1469
1470         switch (dev_priv->r600_max_tile_pipes) {
1471         case 1:
1472                 gb_tiling_config |= R600_PIPE_TILING(0);
1473                 break;
1474         case 2:
1475                 gb_tiling_config |= R600_PIPE_TILING(1);
1476                 break;
1477         case 4:
1478                 gb_tiling_config |= R600_PIPE_TILING(2);
1479                 break;
1480         case 8:
1481                 gb_tiling_config |= R600_PIPE_TILING(3);
1482                 break;
1483         default:
1484                 break;
1485         }
1486
1487         if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1488                 gb_tiling_config |= R600_BANK_TILING(1);
1489         else
1490                 gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
1491
1492         gb_tiling_config |= R600_GROUP_SIZE(0);
1493
1494         if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
1495                 gb_tiling_config |= R600_ROW_TILING(3);
1496                 gb_tiling_config |= R600_SAMPLE_SPLIT(3);
1497         } else {
1498                 gb_tiling_config |=
1499                         R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1500                 gb_tiling_config |=
1501                         R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1502         }
1503
1504         gb_tiling_config |= R600_BANK_SWAPS(1);
1505
1506         cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
1507         cc_rb_backend_disable |=
1508                 R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
1509
1510         cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
1511         cc_gc_shader_pipe_config |=
1512                 R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
1513         cc_gc_shader_pipe_config |=
1514                 R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
1515
1516         if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
1517                 backend_map = 0x28;
1518         else
1519                 backend_map = r700_get_tile_pipe_to_backend_map(dev_priv,
1520                                                                 dev_priv->r600_max_tile_pipes,
1521                                                                 (R7XX_MAX_BACKENDS -
1522                                                                  r600_count_pipe_bits((cc_rb_backend_disable &
1523                                                                                        R7XX_MAX_BACKENDS_MASK) >> 16)),
1524                                                                 (cc_rb_backend_disable >> 16));
1525         gb_tiling_config |= R600_BACKEND_MAP(backend_map);
1526
1527         RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
1528         RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1529         RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1530         if (gb_tiling_config & 0xc0) {
1531                 dev_priv->r600_group_size = 512;
1532         } else {
1533                 dev_priv->r600_group_size = 256;
1534         }
1535         dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
1536         if (gb_tiling_config & 0x30) {
1537                 dev_priv->r600_nbanks = 8;
1538         } else {
1539                 dev_priv->r600_nbanks = 4;
1540         }
1541
1542         RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
1543         RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
1544         RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1545
1546         RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1547         RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
1548         RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
1549         RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
1550         RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
1551
1552         num_qd_pipes =
1553                 R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
1554         RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
1555         RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
1556
1557         /* set HW defaults for 3D engine */
1558         RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
1559                                                 R600_ROQ_IB2_START(0x2b)));
1560
1561         RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
1562
1563         ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX);
1564         RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO);
1565
1566         sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
1567         sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
1568         RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
1569
1570         smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
1571         smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
1572         smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
1573         RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
1574
1575         if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740)
1576                 RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
1577                                                   R700_GS_FLUSH_CTL(4) |
1578                                                   R700_ACK_FLUSH_CTL(3) |
1579                                                   R700_SYNC_FLUSH_CTL));
1580
1581         db_debug3 = RADEON_READ(R700_DB_DEBUG3);
1582         db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f);
1583         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1584         case CHIP_RV770:
1585         case CHIP_RV740:
1586                 db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f);
1587                 break;
1588         case CHIP_RV710:
1589         case CHIP_RV730:
1590         default:
1591                 db_debug3 |= R700_DB_CLK_OFF_DELAY(2);
1592                 break;
1593         }
1594         RADEON_WRITE(R700_DB_DEBUG3, db_debug3);
1595
1596         if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) {
1597                 db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
1598                 db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
1599                 RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
1600         }
1601
1602         RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
1603                                                    R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
1604                                                    R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
1605
1606         RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
1607                                                  R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
1608                                                  R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
1609
1610         RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1611
1612         RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
1613
1614         RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
1615
1616         RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
1617
1618         RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
1619
1620         sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
1621                             R600_DONE_FIFO_HIWATER(0xe0) |
1622                             R600_ALU_UPDATE_FIFO_HIWATER(0x8));
1623         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1624         case CHIP_RV770:
1625         case CHIP_RV730:
1626         case CHIP_RV710:
1627                 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
1628                 break;
1629         case CHIP_RV740:
1630         default:
1631                 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
1632                 break;
1633         }
1634         RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
1635
1636         /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
1637          * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
1638          */
1639         sq_config = RADEON_READ(R600_SQ_CONFIG);
1640         sq_config &= ~(R600_PS_PRIO(3) |
1641                        R600_VS_PRIO(3) |
1642                        R600_GS_PRIO(3) |
1643                        R600_ES_PRIO(3));
1644         sq_config |= (R600_DX9_CONSTS |
1645                       R600_VC_ENABLE |
1646                       R600_EXPORT_SRC_C |
1647                       R600_PS_PRIO(0) |
1648                       R600_VS_PRIO(1) |
1649                       R600_GS_PRIO(2) |
1650                       R600_ES_PRIO(3));
1651         if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1652                 /* no vertex cache */
1653                 sq_config &= ~R600_VC_ENABLE;
1654
1655         RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1656
1657         RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1658                                                     R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1659                                                     R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
1660
1661         RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
1662                                                     R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
1663
1664         sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
1665                                    R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
1666                                    R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
1667         if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
1668                 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
1669         else
1670                 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
1671         RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1672
1673         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1674                                                      R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1675
1676         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1677                                                      R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1678
1679         sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
1680                                      R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
1681                                      R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
1682                                      R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
1683
1684         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
1685         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
1686         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
1687         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
1688         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
1689         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
1690         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
1691         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
1692
1693         RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
1694                                                      R700_FORCE_EOV_MAX_REZ_CNT(255)));
1695
1696         if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1697                 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
1698                                                            R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1699         else
1700                 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
1701                                                            R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1702
1703         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1704         case CHIP_RV770:
1705         case CHIP_RV730:
1706         case CHIP_RV740:
1707                 gs_prim_buffer_depth = 384;
1708                 break;
1709         case CHIP_RV710:
1710                 gs_prim_buffer_depth = 128;
1711                 break;
1712         default:
1713                 break;
1714         }
1715
1716         num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1717         vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1718         /* Max value for this is 256 */
1719         if (vgt_gs_per_es > 256)
1720                 vgt_gs_per_es = 256;
1721
1722         RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1723         RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1724         RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1725
1726         /* more default values. 2D/3D driver should adjust as needed */
1727         RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1728         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1729         RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1730         RADEON_WRITE(R600_SX_MISC, 0);
1731         RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1732         RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
1733         RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1734         RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
1735         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1736         RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1737         RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1738         RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1739
1740         /* clear render buffer base addresses */
1741         RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1742         RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1743         RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1744         RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1745         RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1746         RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1747         RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1748         RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1749
1750         RADEON_WRITE(R700_TCP_CNTL, 0);
1751
1752         hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1753         RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1754
1755         RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1756
1757         RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1758                                           R600_NUM_CLIP_SEQ(3)));
1759
1760 }
1761
1762 static void r600_cp_init_ring_buffer(struct drm_device *dev,
1763                                        drm_radeon_private_t *dev_priv,
1764                                        struct drm_file *file_priv)
1765 {
1766         struct drm_radeon_master_private *master_priv;
1767         u32 ring_start;
1768         u64 rptr_addr;
1769
1770         if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
1771                 r700_gfx_init(dev, dev_priv);
1772         else
1773                 r600_gfx_init(dev, dev_priv);
1774
1775         RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
1776         RADEON_READ(R600_GRBM_SOFT_RESET);
1777         mdelay(15);
1778         RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
1779
1780
1781         /* Set ring buffer size */
1782 #ifdef __BIG_ENDIAN
1783         RADEON_WRITE(R600_CP_RB_CNTL,
1784                      R600_BUF_SWAP_32BIT |
1785                      R600_RB_NO_UPDATE |
1786                      (dev_priv->ring.rptr_update_l2qw << 8) |
1787                      dev_priv->ring.size_l2qw);
1788 #else
1789         RADEON_WRITE(R600_CP_RB_CNTL,
1790                      RADEON_RB_NO_UPDATE |
1791                      (dev_priv->ring.rptr_update_l2qw << 8) |
1792                      dev_priv->ring.size_l2qw);
1793 #endif
1794
1795         RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x0);
1796
1797         /* Set the write pointer delay */
1798         RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
1799
1800 #ifdef __BIG_ENDIAN
1801         RADEON_WRITE(R600_CP_RB_CNTL,
1802                      R600_BUF_SWAP_32BIT |
1803                      R600_RB_NO_UPDATE |
1804                      R600_RB_RPTR_WR_ENA |
1805                      (dev_priv->ring.rptr_update_l2qw << 8) |
1806                      dev_priv->ring.size_l2qw);
1807 #else
1808         RADEON_WRITE(R600_CP_RB_CNTL,
1809                      R600_RB_NO_UPDATE |
1810                      R600_RB_RPTR_WR_ENA |
1811                      (dev_priv->ring.rptr_update_l2qw << 8) |
1812                      dev_priv->ring.size_l2qw);
1813 #endif
1814
1815         /* Initialize the ring buffer's read and write pointers */
1816         RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
1817         RADEON_WRITE(R600_CP_RB_WPTR, 0);
1818         SET_RING_HEAD(dev_priv, 0);
1819         dev_priv->ring.tail = 0;
1820
1821 #if IS_ENABLED(CONFIG_AGP)
1822         if (dev_priv->flags & RADEON_IS_AGP) {
1823                 rptr_addr = dev_priv->ring_rptr->offset
1824                         - dev->agp->base +
1825                         dev_priv->gart_vm_start;
1826         } else
1827 #endif
1828         {
1829                 rptr_addr = dev_priv->ring_rptr->offset
1830                         - ((unsigned long) dev->sg->virtual)
1831                         + dev_priv->gart_vm_start;
1832         }
1833         RADEON_WRITE(R600_CP_RB_RPTR_ADDR, (rptr_addr & 0xfffffffc));
1834         RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, upper_32_bits(rptr_addr));
1835
1836 #ifdef __BIG_ENDIAN
1837         RADEON_WRITE(R600_CP_RB_CNTL,
1838                      RADEON_BUF_SWAP_32BIT |
1839                      (dev_priv->ring.rptr_update_l2qw << 8) |
1840                      dev_priv->ring.size_l2qw);
1841 #else
1842         RADEON_WRITE(R600_CP_RB_CNTL,
1843                      (dev_priv->ring.rptr_update_l2qw << 8) |
1844                      dev_priv->ring.size_l2qw);
1845 #endif
1846
1847 #if IS_ENABLED(CONFIG_AGP)
1848         if (dev_priv->flags & RADEON_IS_AGP) {
1849                 /* XXX */
1850                 radeon_write_agp_base(dev_priv, dev->agp->base);
1851
1852                 /* XXX */
1853                 radeon_write_agp_location(dev_priv,
1854                              (((dev_priv->gart_vm_start - 1 +
1855                                 dev_priv->gart_size) & 0xffff0000) |
1856                               (dev_priv->gart_vm_start >> 16)));
1857
1858                 ring_start = (dev_priv->cp_ring->offset
1859                               - dev->agp->base
1860                               + dev_priv->gart_vm_start);
1861         } else
1862 #endif
1863                 ring_start = (dev_priv->cp_ring->offset
1864                               - (unsigned long)dev->sg->virtual
1865                               + dev_priv->gart_vm_start);
1866
1867         RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
1868
1869         RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
1870
1871         RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
1872
1873         /* Initialize the scratch register pointer.  This will cause
1874          * the scratch register values to be written out to memory
1875          * whenever they are updated.
1876          *
1877          * We simply put this behind the ring read pointer, this works
1878          * with PCI GART as well as (whatever kind of) AGP GART
1879          */
1880         {
1881                 u64 scratch_addr;
1882
1883                 scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC;
1884                 scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
1885                 scratch_addr += R600_SCRATCH_REG_OFFSET;
1886                 scratch_addr >>= 8;
1887                 scratch_addr &= 0xffffffff;
1888
1889                 RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
1890         }
1891
1892         RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
1893
1894         /* Turn on bus mastering */
1895         radeon_enable_bm(dev_priv);
1896
1897         radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
1898         RADEON_WRITE(R600_LAST_FRAME_REG, 0);
1899
1900         radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
1901         RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
1902
1903         radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
1904         RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
1905
1906         /* reset sarea copies of these */
1907         master_priv = file_priv->master->driver_priv;
1908         if (master_priv->sarea_priv) {
1909                 master_priv->sarea_priv->last_frame = 0;
1910                 master_priv->sarea_priv->last_dispatch = 0;
1911                 master_priv->sarea_priv->last_clear = 0;
1912         }
1913
1914         r600_do_wait_for_idle(dev_priv);
1915
1916 }
1917
1918 int r600_do_cleanup_cp(struct drm_device *dev)
1919 {
1920         drm_radeon_private_t *dev_priv = dev->dev_private;
1921         DRM_DEBUG("\n");
1922
1923         /* Make sure interrupts are disabled here because the uninstall ioctl
1924          * may not have been called from userspace and after dev_private
1925          * is freed, it's too late.
1926          */
1927         if (dev->irq_enabled)
1928                 drm_irq_uninstall(dev);
1929
1930 #if IS_ENABLED(CONFIG_AGP)
1931         if (dev_priv->flags & RADEON_IS_AGP) {
1932                 if (dev_priv->cp_ring != NULL) {
1933                         drm_legacy_ioremapfree(dev_priv->cp_ring, dev);
1934                         dev_priv->cp_ring = NULL;
1935                 }
1936                 if (dev_priv->ring_rptr != NULL) {
1937                         drm_legacy_ioremapfree(dev_priv->ring_rptr, dev);
1938                         dev_priv->ring_rptr = NULL;
1939                 }
1940                 if (dev->agp_buffer_map != NULL) {
1941                         drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
1942                         dev->agp_buffer_map = NULL;
1943                 }
1944         } else
1945 #endif
1946         {
1947
1948                 if (dev_priv->gart_info.bus_addr)
1949                         r600_page_table_cleanup(dev, &dev_priv->gart_info);
1950
1951                 if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
1952                         drm_legacy_ioremapfree(&dev_priv->gart_info.mapping, dev);
1953                         dev_priv->gart_info.addr = NULL;
1954                 }
1955         }
1956         /* only clear to the start of flags */
1957         memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
1958
1959         return 0;
1960 }
1961
1962 int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
1963                     struct drm_file *file_priv)
1964 {
1965         drm_radeon_private_t *dev_priv = dev->dev_private;
1966         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1967
1968         DRM_DEBUG("\n");
1969
1970         mutex_init(&dev_priv->cs_mutex);
1971         r600_cs_legacy_init();
1972         /* if we require new memory map but we don't have it fail */
1973         if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
1974                 DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
1975                 r600_do_cleanup_cp(dev);
1976                 return -EINVAL;
1977         }
1978
1979         if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
1980                 DRM_DEBUG("Forcing AGP card to PCI mode\n");
1981                 dev_priv->flags &= ~RADEON_IS_AGP;
1982                 /* The writeback test succeeds, but when writeback is enabled,
1983                  * the ring buffer read ptr update fails after first 128 bytes.
1984                  */
1985                 radeon_no_wb = 1;
1986         } else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
1987                  && !init->is_pci) {
1988                 DRM_DEBUG("Restoring AGP flag\n");
1989                 dev_priv->flags |= RADEON_IS_AGP;
1990         }
1991
1992         dev_priv->usec_timeout = init->usec_timeout;
1993         if (dev_priv->usec_timeout < 1 ||
1994             dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
1995                 DRM_DEBUG("TIMEOUT problem!\n");
1996                 r600_do_cleanup_cp(dev);
1997                 return -EINVAL;
1998         }
1999
2000         /* Enable vblank on CRTC1 for older X servers
2001          */
2002         dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
2003         dev_priv->do_boxes = 0;
2004         dev_priv->cp_mode = init->cp_mode;
2005
2006         /* We don't support anything other than bus-mastering ring mode,
2007          * but the ring can be in either AGP or PCI space for the ring
2008          * read pointer.
2009          */
2010         if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
2011             (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
2012                 DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
2013                 r600_do_cleanup_cp(dev);
2014                 return -EINVAL;
2015         }
2016
2017         switch (init->fb_bpp) {
2018         case 16:
2019                 dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
2020                 break;
2021         case 32:
2022         default:
2023                 dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
2024                 break;
2025         }
2026         dev_priv->front_offset = init->front_offset;
2027         dev_priv->front_pitch = init->front_pitch;
2028         dev_priv->back_offset = init->back_offset;
2029         dev_priv->back_pitch = init->back_pitch;
2030
2031         dev_priv->ring_offset = init->ring_offset;
2032         dev_priv->ring_rptr_offset = init->ring_rptr_offset;
2033         dev_priv->buffers_offset = init->buffers_offset;
2034         dev_priv->gart_textures_offset = init->gart_textures_offset;
2035
2036         master_priv->sarea = drm_legacy_getsarea(dev);
2037         if (!master_priv->sarea) {
2038                 DRM_ERROR("could not find sarea!\n");
2039                 r600_do_cleanup_cp(dev);
2040                 return -EINVAL;
2041         }
2042
2043         dev_priv->cp_ring = drm_legacy_findmap(dev, init->ring_offset);
2044         if (!dev_priv->cp_ring) {
2045                 DRM_ERROR("could not find cp ring region!\n");
2046                 r600_do_cleanup_cp(dev);
2047                 return -EINVAL;
2048         }
2049         dev_priv->ring_rptr = drm_legacy_findmap(dev, init->ring_rptr_offset);
2050         if (!dev_priv->ring_rptr) {
2051                 DRM_ERROR("could not find ring read pointer!\n");
2052                 r600_do_cleanup_cp(dev);
2053                 return -EINVAL;
2054         }
2055         dev->agp_buffer_token = init->buffers_offset;
2056         dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
2057         if (!dev->agp_buffer_map) {
2058                 DRM_ERROR("could not find dma buffer region!\n");
2059                 r600_do_cleanup_cp(dev);
2060                 return -EINVAL;
2061         }
2062
2063         if (init->gart_textures_offset) {
2064                 dev_priv->gart_textures =
2065                     drm_legacy_findmap(dev, init->gart_textures_offset);
2066                 if (!dev_priv->gart_textures) {
2067                         DRM_ERROR("could not find GART texture region!\n");
2068                         r600_do_cleanup_cp(dev);
2069                         return -EINVAL;
2070                 }
2071         }
2072
2073 #if IS_ENABLED(CONFIG_AGP)
2074         /* XXX */
2075         if (dev_priv->flags & RADEON_IS_AGP) {
2076                 drm_legacy_ioremap_wc(dev_priv->cp_ring, dev);
2077                 drm_legacy_ioremap_wc(dev_priv->ring_rptr, dev);
2078                 drm_legacy_ioremap_wc(dev->agp_buffer_map, dev);
2079                 if (!dev_priv->cp_ring->handle ||
2080                     !dev_priv->ring_rptr->handle ||
2081                     !dev->agp_buffer_map->handle) {
2082                         DRM_ERROR("could not find ioremap agp regions!\n");
2083                         r600_do_cleanup_cp(dev);
2084                         return -EINVAL;
2085                 }
2086         } else
2087 #endif
2088         {
2089                 dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
2090                 dev_priv->ring_rptr->handle =
2091                         (void *)(unsigned long)dev_priv->ring_rptr->offset;
2092                 dev->agp_buffer_map->handle =
2093                         (void *)(unsigned long)dev->agp_buffer_map->offset;
2094
2095                 DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
2096                           dev_priv->cp_ring->handle);
2097                 DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
2098                           dev_priv->ring_rptr->handle);
2099                 DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
2100                           dev->agp_buffer_map->handle);
2101         }
2102
2103         dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
2104         dev_priv->fb_size =
2105                 (((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
2106                 - dev_priv->fb_location;
2107
2108         dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
2109                                         ((dev_priv->front_offset
2110                                           + dev_priv->fb_location) >> 10));
2111
2112         dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
2113                                        ((dev_priv->back_offset
2114                                          + dev_priv->fb_location) >> 10));
2115
2116         dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
2117                                         ((dev_priv->depth_offset
2118                                           + dev_priv->fb_location) >> 10));
2119
2120         dev_priv->gart_size = init->gart_size;
2121
2122         /* New let's set the memory map ... */
2123         if (dev_priv->new_memmap) {
2124                 u32 base = 0;
2125
2126                 DRM_INFO("Setting GART location based on new memory map\n");
2127
2128                 /* If using AGP, try to locate the AGP aperture at the same
2129                  * location in the card and on the bus, though we have to
2130                  * align it down.
2131                  */
2132 #if IS_ENABLED(CONFIG_AGP)
2133                 /* XXX */
2134                 if (dev_priv->flags & RADEON_IS_AGP) {
2135                         base = dev->agp->base;
2136                         /* Check if valid */
2137                         if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
2138                             base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
2139                                 DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
2140                                          dev->agp->base);
2141                                 base = 0;
2142                         }
2143                 }
2144 #endif
2145                 /* If not or if AGP is at 0 (Macs), try to put it elsewhere */
2146                 if (base == 0) {
2147                         base = dev_priv->fb_location + dev_priv->fb_size;
2148                         if (base < dev_priv->fb_location ||
2149                             ((base + dev_priv->gart_size) & 0xfffffffful) < base)
2150                                 base = dev_priv->fb_location
2151                                         - dev_priv->gart_size;
2152                 }
2153                 dev_priv->gart_vm_start = base & 0xffc00000u;
2154                 if (dev_priv->gart_vm_start != base)
2155                         DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
2156                                  base, dev_priv->gart_vm_start);
2157         }
2158
2159 #if IS_ENABLED(CONFIG_AGP)
2160         /* XXX */
2161         if (dev_priv->flags & RADEON_IS_AGP)
2162                 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2163                                                  - dev->agp->base
2164                                                  + dev_priv->gart_vm_start);
2165         else
2166 #endif
2167                 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2168                                                  - (unsigned long)dev->sg->virtual
2169                                                  + dev_priv->gart_vm_start);
2170
2171         DRM_DEBUG("fb 0x%08x size %d\n",
2172                   (unsigned int) dev_priv->fb_location,
2173                   (unsigned int) dev_priv->fb_size);
2174         DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
2175         DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
2176                   (unsigned int) dev_priv->gart_vm_start);
2177         DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
2178                   dev_priv->gart_buffers_offset);
2179
2180         dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
2181         dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
2182                               + init->ring_size / sizeof(u32));
2183         dev_priv->ring.size = init->ring_size;
2184         dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8);
2185
2186         dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
2187         dev_priv->ring.rptr_update_l2qw = order_base_2(/* init->rptr_update */ 4096 / 8);
2188
2189         dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
2190         dev_priv->ring.fetch_size_l2ow = order_base_2(/* init->fetch_size */ 32 / 16);
2191
2192         dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
2193
2194         dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
2195
2196 #if IS_ENABLED(CONFIG_AGP)
2197         if (dev_priv->flags & RADEON_IS_AGP) {
2198                 /* XXX turn off pcie gart */
2199         } else
2200 #endif
2201         {
2202                 dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
2203                 /* if we have an offset set from userspace */
2204                 if (!dev_priv->pcigart_offset_set) {
2205                         DRM_ERROR("Need gart offset from userspace\n");
2206                         r600_do_cleanup_cp(dev);
2207                         return -EINVAL;
2208                 }
2209
2210                 DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
2211
2212                 dev_priv->gart_info.bus_addr =
2213                         dev_priv->pcigart_offset + dev_priv->fb_location;
2214                 dev_priv->gart_info.mapping.offset =
2215                         dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
2216                 dev_priv->gart_info.mapping.size =
2217                         dev_priv->gart_info.table_size;
2218
2219                 drm_legacy_ioremap_wc(&dev_priv->gart_info.mapping, dev);
2220                 if (!dev_priv->gart_info.mapping.handle) {
2221                         DRM_ERROR("ioremap failed.\n");
2222                         r600_do_cleanup_cp(dev);
2223                         return -EINVAL;
2224                 }
2225
2226                 dev_priv->gart_info.addr =
2227                         dev_priv->gart_info.mapping.handle;
2228
2229                 DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
2230                           dev_priv->gart_info.addr,
2231                           dev_priv->pcigart_offset);
2232
2233                 if (!r600_page_table_init(dev)) {
2234                         DRM_ERROR("Failed to init GART table\n");
2235                         r600_do_cleanup_cp(dev);
2236                         return -EINVAL;
2237                 }
2238
2239                 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2240                         r700_vm_init(dev);
2241                 else
2242                         r600_vm_init(dev);
2243         }
2244
2245         if (!dev_priv->me_fw || !dev_priv->pfp_fw) {
2246                 int err = r600_cp_init_microcode(dev_priv);
2247                 if (err) {
2248                         DRM_ERROR("Failed to load firmware!\n");
2249                         r600_do_cleanup_cp(dev);
2250                         return err;
2251                 }
2252         }
2253         if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2254                 r700_cp_load_microcode(dev_priv);
2255         else
2256                 r600_cp_load_microcode(dev_priv);
2257
2258         r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2259
2260         dev_priv->last_buf = 0;
2261
2262         r600_do_engine_reset(dev);
2263         r600_test_writeback(dev_priv);
2264
2265         return 0;
2266 }
2267
2268 int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
2269 {
2270         drm_radeon_private_t *dev_priv = dev->dev_private;
2271
2272         DRM_DEBUG("\n");
2273         if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
2274                 r700_vm_init(dev);
2275                 r700_cp_load_microcode(dev_priv);
2276         } else {
2277                 r600_vm_init(dev);
2278                 r600_cp_load_microcode(dev_priv);
2279         }
2280         r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2281         r600_do_engine_reset(dev);
2282
2283         return 0;
2284 }
2285
2286 /* Wait for the CP to go idle.
2287  */
2288 int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
2289 {
2290         RING_LOCALS;
2291         DRM_DEBUG("\n");
2292
2293         BEGIN_RING(5);
2294         OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
2295         OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
2296         /* wait for 3D idle clean */
2297         OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
2298         OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
2299         OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
2300
2301         ADVANCE_RING();
2302         COMMIT_RING();
2303
2304         return r600_do_wait_for_idle(dev_priv);
2305 }
2306
2307 /* Start the Command Processor.
2308  */
2309 void r600_do_cp_start(drm_radeon_private_t *dev_priv)
2310 {
2311         u32 cp_me;
2312         RING_LOCALS;
2313         DRM_DEBUG("\n");
2314
2315         BEGIN_RING(7);
2316         OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
2317         OUT_RING(0x00000001);
2318         if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
2319                 OUT_RING(0x00000003);
2320         else
2321                 OUT_RING(0x00000000);
2322         OUT_RING((dev_priv->r600_max_hw_contexts - 1));
2323         OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
2324         OUT_RING(0x00000000);
2325         OUT_RING(0x00000000);
2326         ADVANCE_RING();
2327         COMMIT_RING();
2328
2329         /* set the mux and reset the halt bit */
2330         cp_me = 0xff;
2331         RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2332
2333         dev_priv->cp_running = 1;
2334
2335 }
2336
2337 void r600_do_cp_reset(drm_radeon_private_t *dev_priv)
2338 {
2339         u32 cur_read_ptr;
2340         DRM_DEBUG("\n");
2341
2342         cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
2343         RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
2344         SET_RING_HEAD(dev_priv, cur_read_ptr);
2345         dev_priv->ring.tail = cur_read_ptr;
2346 }
2347
2348 void r600_do_cp_stop(drm_radeon_private_t *dev_priv)
2349 {
2350         uint32_t cp_me;
2351
2352         DRM_DEBUG("\n");
2353
2354         cp_me = 0xff | R600_CP_ME_HALT;
2355
2356         RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2357
2358         dev_priv->cp_running = 0;
2359 }
2360
2361 int r600_cp_dispatch_indirect(struct drm_device *dev,
2362                               struct drm_buf *buf, int start, int end)
2363 {
2364         drm_radeon_private_t *dev_priv = dev->dev_private;
2365         RING_LOCALS;
2366
2367         if (start != end) {
2368                 unsigned long offset = (dev_priv->gart_buffers_offset
2369                                         + buf->offset + start);
2370                 int dwords = (end - start + 3) / sizeof(u32);
2371
2372                 DRM_DEBUG("dwords:%d\n", dwords);
2373                 DRM_DEBUG("offset 0x%lx\n", offset);
2374
2375
2376                 /* Indirect buffer data must be a multiple of 16 dwords.
2377                  * pad the data with a Type-2 CP packet.
2378                  */
2379                 while (dwords & 0xf) {
2380                         u32 *data = (u32 *)
2381                             ((char *)dev->agp_buffer_map->handle
2382                              + buf->offset + start);
2383                         data[dwords++] = RADEON_CP_PACKET2;
2384                 }
2385
2386                 /* Fire off the indirect buffer */
2387                 BEGIN_RING(4);
2388                 OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
2389                 OUT_RING((offset & 0xfffffffc));
2390                 OUT_RING((upper_32_bits(offset) & 0xff));
2391                 OUT_RING(dwords);
2392                 ADVANCE_RING();
2393         }
2394
2395         return 0;
2396 }
2397
2398 void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
2399 {
2400         drm_radeon_private_t *dev_priv = dev->dev_private;
2401         struct drm_master *master = file_priv->master;
2402         struct drm_radeon_master_private *master_priv = master->driver_priv;
2403         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2404         int nbox = sarea_priv->nbox;
2405         struct drm_clip_rect *pbox = sarea_priv->boxes;
2406         int i, cpp, src_pitch, dst_pitch;
2407         uint64_t src, dst;
2408         RING_LOCALS;
2409         DRM_DEBUG("\n");
2410
2411         if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
2412                 cpp = 4;
2413         else
2414                 cpp = 2;
2415
2416         if (sarea_priv->pfCurrentPage == 0) {
2417                 src_pitch = dev_priv->back_pitch;
2418                 dst_pitch = dev_priv->front_pitch;
2419                 src = dev_priv->back_offset + dev_priv->fb_location;
2420                 dst = dev_priv->front_offset + dev_priv->fb_location;
2421         } else {
2422                 src_pitch = dev_priv->front_pitch;
2423                 dst_pitch = dev_priv->back_pitch;
2424                 src = dev_priv->front_offset + dev_priv->fb_location;
2425                 dst = dev_priv->back_offset + dev_priv->fb_location;
2426         }
2427
2428         if (r600_prepare_blit_copy(dev, file_priv)) {
2429                 DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
2430                 return;
2431         }
2432         for (i = 0; i < nbox; i++) {
2433                 int x = pbox[i].x1;
2434                 int y = pbox[i].y1;
2435                 int w = pbox[i].x2 - x;
2436                 int h = pbox[i].y2 - y;
2437
2438                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
2439
2440                 r600_blit_swap(dev,
2441                                src, dst,
2442                                x, y, x, y, w, h,
2443                                src_pitch, dst_pitch, cpp);
2444         }
2445         r600_done_blit_copy(dev);
2446
2447         /* Increment the frame counter.  The client-side 3D driver must
2448          * throttle the framerate by waiting for this value before
2449          * performing the swapbuffer ioctl.
2450          */
2451         sarea_priv->last_frame++;
2452
2453         BEGIN_RING(3);
2454         R600_FRAME_AGE(sarea_priv->last_frame);
2455         ADVANCE_RING();
2456 }
2457
2458 int r600_cp_dispatch_texture(struct drm_device *dev,
2459                              struct drm_file *file_priv,
2460                              drm_radeon_texture_t *tex,
2461                              drm_radeon_tex_image_t *image)
2462 {
2463         drm_radeon_private_t *dev_priv = dev->dev_private;
2464         struct drm_buf *buf;
2465         u32 *buffer;
2466         const u8 __user *data;
2467         unsigned int size, pass_size;
2468         u64 src_offset, dst_offset;
2469
2470         if (!radeon_check_offset(dev_priv, tex->offset)) {
2471                 DRM_ERROR("Invalid destination offset\n");
2472                 return -EINVAL;
2473         }
2474
2475         /* this might fail for zero-sized uploads - are those illegal? */
2476         if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
2477                 DRM_ERROR("Invalid final destination offset\n");
2478                 return -EINVAL;
2479         }
2480
2481         size = tex->height * tex->pitch;
2482
2483         if (size == 0)
2484                 return 0;
2485
2486         dst_offset = tex->offset;
2487
2488         if (r600_prepare_blit_copy(dev, file_priv)) {
2489                 DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
2490                 return -EAGAIN;
2491         }
2492         do {
2493                 data = (const u8 __user *)image->data;
2494                 pass_size = size;
2495
2496                 buf = radeon_freelist_get(dev);
2497                 if (!buf) {
2498                         DRM_DEBUG("EAGAIN\n");
2499                         if (copy_to_user(tex->image, image, sizeof(*image)))
2500                                 return -EFAULT;
2501                         return -EAGAIN;
2502                 }
2503
2504                 if (pass_size > buf->total)
2505                         pass_size = buf->total;
2506
2507                 /* Dispatch the indirect buffer.
2508                  */
2509                 buffer =
2510                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
2511
2512                 if (copy_from_user(buffer, data, pass_size)) {
2513                         DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
2514                         return -EFAULT;
2515                 }
2516
2517                 buf->file_priv = file_priv;
2518                 buf->used = pass_size;
2519                 src_offset = dev_priv->gart_buffers_offset + buf->offset;
2520
2521                 r600_blit_copy(dev, src_offset, dst_offset, pass_size);
2522
2523                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2524
2525                 /* Update the input parameters for next time */
2526                 image->data = (const u8 __user *)image->data + pass_size;
2527                 dst_offset += pass_size;
2528                 size -= pass_size;
2529         } while (size > 0);
2530         r600_done_blit_copy(dev);
2531
2532         return 0;
2533 }
2534
2535 /*
2536  * Legacy cs ioctl
2537  */
2538 static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
2539 {
2540         /* FIXME: check if wrap affect last reported wrap & sequence */
2541         radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
2542         if (!radeon->cs_id_scnt) {
2543                 /* increment wrap counter */
2544                 radeon->cs_id_wcnt += 0x01000000;
2545                 /* valid sequence counter start at 1 */
2546                 radeon->cs_id_scnt = 1;
2547         }
2548         return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
2549 }
2550
2551 static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
2552 {
2553         RING_LOCALS;
2554
2555         *id = radeon_cs_id_get(dev_priv);
2556
2557         /* SCRATCH 2 */
2558         BEGIN_RING(3);
2559         R600_CLEAR_AGE(*id);
2560         ADVANCE_RING();
2561         COMMIT_RING();
2562 }
2563
2564 static int r600_ib_get(struct drm_device *dev,
2565                         struct drm_file *fpriv,
2566                         struct drm_buf **buffer)
2567 {
2568         struct drm_buf *buf;
2569
2570         *buffer = NULL;
2571         buf = radeon_freelist_get(dev);
2572         if (!buf) {
2573                 return -EBUSY;
2574         }
2575         buf->file_priv = fpriv;
2576         *buffer = buf;
2577         return 0;
2578 }
2579
2580 static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
2581                         struct drm_file *fpriv, int l, int r)
2582 {
2583         drm_radeon_private_t *dev_priv = dev->dev_private;
2584
2585         if (buf) {
2586                 if (!r)
2587                         r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
2588                 radeon_cp_discard_buffer(dev, fpriv->master, buf);
2589                 COMMIT_RING();
2590         }
2591 }
2592
2593 int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
2594 {
2595         struct drm_radeon_private *dev_priv = dev->dev_private;
2596         struct drm_radeon_cs *cs = data;
2597         struct drm_buf *buf;
2598         unsigned family;
2599         int l, r = 0;
2600         u32 *ib, cs_id = 0;
2601
2602         if (dev_priv == NULL) {
2603                 DRM_ERROR("called with no initialization\n");
2604                 return -EINVAL;
2605         }
2606         family = dev_priv->flags & RADEON_FAMILY_MASK;
2607         if (family < CHIP_R600) {
2608                 DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
2609                 return -EINVAL;
2610         }
2611         mutex_lock(&dev_priv->cs_mutex);
2612         /* get ib */
2613         r = r600_ib_get(dev, fpriv, &buf);
2614         if (r) {
2615                 DRM_ERROR("ib_get failed\n");
2616                 goto out;
2617         }
2618         ib = dev->agp_buffer_map->handle + buf->offset;
2619         /* now parse command stream */
2620         r = r600_cs_legacy(dev, data,  fpriv, family, ib, &l);
2621         if (r) {
2622                 goto out;
2623         }
2624
2625 out:
2626         r600_ib_free(dev, buf, fpriv, l, r);
2627         /* emit cs id sequence */
2628         r600_cs_id_emit(dev_priv, &cs_id);
2629         cs->cs_id = cs_id;
2630         mutex_unlock(&dev_priv->cs_mutex);
2631         return r;
2632 }
2633
2634 void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size)
2635 {
2636         struct drm_radeon_private *dev_priv = dev->dev_private;
2637
2638         *npipes = dev_priv->r600_npipes;
2639         *nbanks = dev_priv->r600_nbanks;
2640         *group_size = dev_priv->r600_group_size;
2641 }