GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / mtd / nand / gpmi-nand / gpmi-lib.c
1 /*
2  * Freescale GPMI NAND Flash Driver
3  *
4  * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
5  * Copyright (C) 2008 Embedded Alley Solutions, Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 #include <linux/delay.h>
22 #include <linux/clk.h>
23 #include <linux/slab.h>
24
25 #include "gpmi-nand.h"
26 #include "gpmi-regs.h"
27 #include "bch-regs.h"
28
29 static struct timing_threshold timing_default_threshold = {
30         .max_data_setup_cycles       = (BM_GPMI_TIMING0_DATA_SETUP >>
31                                                 BP_GPMI_TIMING0_DATA_SETUP),
32         .internal_data_setup_in_ns   = 0,
33         .max_sample_delay_factor     = (BM_GPMI_CTRL1_RDN_DELAY >>
34                                                 BP_GPMI_CTRL1_RDN_DELAY),
35         .max_dll_clock_period_in_ns  = 32,
36         .max_dll_delay_in_ns         = 16,
37 };
38
39 #define MXS_SET_ADDR            0x4
40 #define MXS_CLR_ADDR            0x8
41 /*
42  * Clear the bit and poll it cleared.  This is usually called with
43  * a reset address and mask being either SFTRST(bit 31) or CLKGATE
44  * (bit 30).
45  */
46 static int clear_poll_bit(void __iomem *addr, u32 mask)
47 {
48         int timeout = 0x400;
49
50         /* clear the bit */
51         writel(mask, addr + MXS_CLR_ADDR);
52
53         /*
54          * SFTRST needs 3 GPMI clocks to settle, the reference manual
55          * recommends to wait 1us.
56          */
57         udelay(1);
58
59         /* poll the bit becoming clear */
60         while ((readl(addr) & mask) && --timeout)
61                 /* nothing */;
62
63         return !timeout;
64 }
65
66 #define MODULE_CLKGATE          (1 << 30)
67 #define MODULE_SFTRST           (1 << 31)
68 /*
69  * The current mxs_reset_block() will do two things:
70  *  [1] enable the module.
71  *  [2] reset the module.
72  *
73  * In most of the cases, it's ok.
74  * But in MX23, there is a hardware bug in the BCH block (see erratum #2847).
75  * If you try to soft reset the BCH block, it becomes unusable until
76  * the next hard reset. This case occurs in the NAND boot mode. When the board
77  * boots by NAND, the ROM of the chip will initialize the BCH blocks itself.
78  * So If the driver tries to reset the BCH again, the BCH will not work anymore.
79  * You will see a DMA timeout in this case. The bug has been fixed
80  * in the following chips, such as MX28.
81  *
82  * To avoid this bug, just add a new parameter `just_enable` for
83  * the mxs_reset_block(), and rewrite it here.
84  */
85 static int gpmi_reset_block(void __iomem *reset_addr, bool just_enable)
86 {
87         int ret;
88         int timeout = 0x400;
89
90         /* clear and poll SFTRST */
91         ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
92         if (unlikely(ret))
93                 goto error;
94
95         /* clear CLKGATE */
96         writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR);
97
98         if (!just_enable) {
99                 /* set SFTRST to reset the block */
100                 writel(MODULE_SFTRST, reset_addr + MXS_SET_ADDR);
101                 udelay(1);
102
103                 /* poll CLKGATE becoming set */
104                 while ((!(readl(reset_addr) & MODULE_CLKGATE)) && --timeout)
105                         /* nothing */;
106                 if (unlikely(!timeout))
107                         goto error;
108         }
109
110         /* clear and poll SFTRST */
111         ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
112         if (unlikely(ret))
113                 goto error;
114
115         /* clear and poll CLKGATE */
116         ret = clear_poll_bit(reset_addr, MODULE_CLKGATE);
117         if (unlikely(ret))
118                 goto error;
119
120         return 0;
121
122 error:
123         pr_err("%s(%p): module reset timeout\n", __func__, reset_addr);
124         return -ETIMEDOUT;
125 }
126
127 static int __gpmi_enable_clk(struct gpmi_nand_data *this, bool v)
128 {
129         struct clk *clk;
130         int ret;
131         int i;
132
133         for (i = 0; i < GPMI_CLK_MAX; i++) {
134                 clk = this->resources.clock[i];
135                 if (!clk)
136                         break;
137
138                 if (v) {
139                         ret = clk_prepare_enable(clk);
140                         if (ret)
141                                 goto err_clk;
142                 } else {
143                         clk_disable_unprepare(clk);
144                 }
145         }
146         return 0;
147
148 err_clk:
149         for (; i > 0; i--)
150                 clk_disable_unprepare(this->resources.clock[i - 1]);
151         return ret;
152 }
153
154 #define gpmi_enable_clk(x) __gpmi_enable_clk(x, true)
155 #define gpmi_disable_clk(x) __gpmi_enable_clk(x, false)
156
157 int gpmi_init(struct gpmi_nand_data *this)
158 {
159         struct resources *r = &this->resources;
160         int ret;
161
162         ret = gpmi_enable_clk(this);
163         if (ret)
164                 return ret;
165         ret = gpmi_reset_block(r->gpmi_regs, false);
166         if (ret)
167                 goto err_out;
168
169         /*
170          * Reset BCH here, too. We got failures otherwise :(
171          * See later BCH reset for explanation of MX23 and MX28 handling
172          */
173         ret = gpmi_reset_block(r->bch_regs,
174                                GPMI_IS_MX23(this) || GPMI_IS_MX28(this));
175         if (ret)
176                 goto err_out;
177
178
179         /* Choose NAND mode. */
180         writel(BM_GPMI_CTRL1_GPMI_MODE, r->gpmi_regs + HW_GPMI_CTRL1_CLR);
181
182         /* Set the IRQ polarity. */
183         writel(BM_GPMI_CTRL1_ATA_IRQRDY_POLARITY,
184                                 r->gpmi_regs + HW_GPMI_CTRL1_SET);
185
186         /* Disable Write-Protection. */
187         writel(BM_GPMI_CTRL1_DEV_RESET, r->gpmi_regs + HW_GPMI_CTRL1_SET);
188
189         /* Select BCH ECC. */
190         writel(BM_GPMI_CTRL1_BCH_MODE, r->gpmi_regs + HW_GPMI_CTRL1_SET);
191
192         /*
193          * Decouple the chip select from dma channel. We use dma0 for all
194          * the chips.
195          */
196         writel(BM_GPMI_CTRL1_DECOUPLE_CS, r->gpmi_regs + HW_GPMI_CTRL1_SET);
197
198         gpmi_disable_clk(this);
199         return 0;
200 err_out:
201         gpmi_disable_clk(this);
202         return ret;
203 }
204
205 /* This function is very useful. It is called only when the bug occur. */
206 void gpmi_dump_info(struct gpmi_nand_data *this)
207 {
208         struct resources *r = &this->resources;
209         struct bch_geometry *geo = &this->bch_geometry;
210         u32 reg;
211         int i;
212
213         dev_err(this->dev, "Show GPMI registers :\n");
214         for (i = 0; i <= HW_GPMI_DEBUG / 0x10 + 1; i++) {
215                 reg = readl(r->gpmi_regs + i * 0x10);
216                 dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
217         }
218
219         /* start to print out the BCH info */
220         dev_err(this->dev, "Show BCH registers :\n");
221         for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) {
222                 reg = readl(r->bch_regs + i * 0x10);
223                 dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
224         }
225         dev_err(this->dev, "BCH Geometry :\n"
226                 "GF length              : %u\n"
227                 "ECC Strength           : %u\n"
228                 "Page Size in Bytes     : %u\n"
229                 "Metadata Size in Bytes : %u\n"
230                 "ECC Chunk Size in Bytes: %u\n"
231                 "ECC Chunk Count        : %u\n"
232                 "Payload Size in Bytes  : %u\n"
233                 "Auxiliary Size in Bytes: %u\n"
234                 "Auxiliary Status Offset: %u\n"
235                 "Block Mark Byte Offset : %u\n"
236                 "Block Mark Bit Offset  : %u\n",
237                 geo->gf_len,
238                 geo->ecc_strength,
239                 geo->page_size,
240                 geo->metadata_size,
241                 geo->ecc_chunk_size,
242                 geo->ecc_chunk_count,
243                 geo->payload_size,
244                 geo->auxiliary_size,
245                 geo->auxiliary_status_offset,
246                 geo->block_mark_byte_offset,
247                 geo->block_mark_bit_offset);
248 }
249
250 /* Configures the geometry for BCH.  */
251 int bch_set_geometry(struct gpmi_nand_data *this)
252 {
253         struct resources *r = &this->resources;
254         struct bch_geometry *bch_geo = &this->bch_geometry;
255         unsigned int block_count;
256         unsigned int block_size;
257         unsigned int metadata_size;
258         unsigned int ecc_strength;
259         unsigned int page_size;
260         unsigned int gf_len;
261         int ret;
262
263         if (common_nfc_set_geometry(this))
264                 return !0;
265
266         block_count   = bch_geo->ecc_chunk_count - 1;
267         block_size    = bch_geo->ecc_chunk_size;
268         metadata_size = bch_geo->metadata_size;
269         ecc_strength  = bch_geo->ecc_strength >> 1;
270         page_size     = bch_geo->page_size;
271         gf_len        = bch_geo->gf_len;
272
273         ret = gpmi_enable_clk(this);
274         if (ret)
275                 return ret;
276
277         /*
278         * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this
279         * chip, otherwise it will lock up. So we skip resetting BCH on the MX23
280         * and MX28.
281         */
282         ret = gpmi_reset_block(r->bch_regs,
283                                GPMI_IS_MX23(this) || GPMI_IS_MX28(this));
284         if (ret)
285                 goto err_out;
286
287         /* Configure layout 0. */
288         writel(BF_BCH_FLASH0LAYOUT0_NBLOCKS(block_count)
289                         | BF_BCH_FLASH0LAYOUT0_META_SIZE(metadata_size)
290                         | BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this)
291                         | BF_BCH_FLASH0LAYOUT0_GF(gf_len, this)
292                         | BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this),
293                         r->bch_regs + HW_BCH_FLASH0LAYOUT0);
294
295         writel(BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size)
296                         | BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this)
297                         | BF_BCH_FLASH0LAYOUT1_GF(gf_len, this)
298                         | BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this),
299                         r->bch_regs + HW_BCH_FLASH0LAYOUT1);
300
301         /* Set *all* chip selects to use layout 0. */
302         writel(0, r->bch_regs + HW_BCH_LAYOUTSELECT);
303
304         /* Enable interrupts. */
305         writel(BM_BCH_CTRL_COMPLETE_IRQ_EN,
306                                 r->bch_regs + HW_BCH_CTRL_SET);
307
308         gpmi_disable_clk(this);
309         return 0;
310 err_out:
311         gpmi_disable_clk(this);
312         return ret;
313 }
314
315 /* Converts time in nanoseconds to cycles. */
316 static unsigned int ns_to_cycles(unsigned int time,
317                         unsigned int period, unsigned int min)
318 {
319         unsigned int k;
320
321         k = (time + period - 1) / period;
322         return max(k, min);
323 }
324
325 #define DEF_MIN_PROP_DELAY      5
326 #define DEF_MAX_PROP_DELAY      9
327 /* Apply timing to current hardware conditions. */
328 static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this,
329                                         struct gpmi_nfc_hardware_timing *hw)
330 {
331         struct timing_threshold *nfc = &timing_default_threshold;
332         struct resources *r = &this->resources;
333         struct nand_chip *nand = &this->nand;
334         struct nand_timing target = this->timing;
335         bool improved_timing_is_available;
336         unsigned long clock_frequency_in_hz;
337         unsigned int clock_period_in_ns;
338         bool dll_use_half_periods;
339         unsigned int dll_delay_shift;
340         unsigned int max_sample_delay_in_ns;
341         unsigned int address_setup_in_cycles;
342         unsigned int data_setup_in_ns;
343         unsigned int data_setup_in_cycles;
344         unsigned int data_hold_in_cycles;
345         int ideal_sample_delay_in_ns;
346         unsigned int sample_delay_factor;
347         int tEYE;
348         unsigned int min_prop_delay_in_ns = DEF_MIN_PROP_DELAY;
349         unsigned int max_prop_delay_in_ns = DEF_MAX_PROP_DELAY;
350
351         /*
352          * If there are multiple chips, we need to relax the timings to allow
353          * for signal distortion due to higher capacitance.
354          */
355         if (nand->numchips > 2) {
356                 target.data_setup_in_ns    += 10;
357                 target.data_hold_in_ns     += 10;
358                 target.address_setup_in_ns += 10;
359         } else if (nand->numchips > 1) {
360                 target.data_setup_in_ns    += 5;
361                 target.data_hold_in_ns     += 5;
362                 target.address_setup_in_ns += 5;
363         }
364
365         /* Check if improved timing information is available. */
366         improved_timing_is_available =
367                 (target.tREA_in_ns  >= 0) &&
368                 (target.tRLOH_in_ns >= 0) &&
369                 (target.tRHOH_in_ns >= 0);
370
371         /* Inspect the clock. */
372         nfc->clock_frequency_in_hz = clk_get_rate(r->clock[0]);
373         clock_frequency_in_hz = nfc->clock_frequency_in_hz;
374         clock_period_in_ns    = NSEC_PER_SEC / clock_frequency_in_hz;
375
376         /*
377          * The NFC quantizes setup and hold parameters in terms of clock cycles.
378          * Here, we quantize the setup and hold timing parameters to the
379          * next-highest clock period to make sure we apply at least the
380          * specified times.
381          *
382          * For data setup and data hold, the hardware interprets a value of zero
383          * as the largest possible delay. This is not what's intended by a zero
384          * in the input parameter, so we impose a minimum of one cycle.
385          */
386         data_setup_in_cycles    = ns_to_cycles(target.data_setup_in_ns,
387                                                         clock_period_in_ns, 1);
388         data_hold_in_cycles     = ns_to_cycles(target.data_hold_in_ns,
389                                                         clock_period_in_ns, 1);
390         address_setup_in_cycles = ns_to_cycles(target.address_setup_in_ns,
391                                                         clock_period_in_ns, 0);
392
393         /*
394          * The clock's period affects the sample delay in a number of ways:
395          *
396          * (1) The NFC HAL tells us the maximum clock period the sample delay
397          *     DLL can tolerate. If the clock period is greater than half that
398          *     maximum, we must configure the DLL to be driven by half periods.
399          *
400          * (2) We need to convert from an ideal sample delay, in ns, to a
401          *     "sample delay factor," which the NFC uses. This factor depends on
402          *     whether we're driving the DLL with full or half periods.
403          *     Paraphrasing the reference manual:
404          *
405          *         AD = SDF x 0.125 x RP
406          *
407          * where:
408          *
409          *     AD   is the applied delay, in ns.
410          *     SDF  is the sample delay factor, which is dimensionless.
411          *     RP   is the reference period, in ns, which is a full clock period
412          *          if the DLL is being driven by full periods, or half that if
413          *          the DLL is being driven by half periods.
414          *
415          * Let's re-arrange this in a way that's more useful to us:
416          *
417          *                        8
418          *         SDF  =  AD x ----
419          *                       RP
420          *
421          * The reference period is either the clock period or half that, so this
422          * is:
423          *
424          *                        8       AD x DDF
425          *         SDF  =  AD x -----  =  --------
426          *                      f x P        P
427          *
428          * where:
429          *
430          *       f  is 1 or 1/2, depending on how we're driving the DLL.
431          *       P  is the clock period.
432          *     DDF  is the DLL Delay Factor, a dimensionless value that
433          *          incorporates all the constants in the conversion.
434          *
435          * DDF will be either 8 or 16, both of which are powers of two. We can
436          * reduce the cost of this conversion by using bit shifts instead of
437          * multiplication or division. Thus:
438          *
439          *                 AD << DDS
440          *         SDF  =  ---------
441          *                     P
442          *
443          *     or
444          *
445          *         AD  =  (SDF >> DDS) x P
446          *
447          * where:
448          *
449          *     DDS  is the DLL Delay Shift, the logarithm to base 2 of the DDF.
450          */
451         if (clock_period_in_ns > (nfc->max_dll_clock_period_in_ns >> 1)) {
452                 dll_use_half_periods = true;
453                 dll_delay_shift      = 3 + 1;
454         } else {
455                 dll_use_half_periods = false;
456                 dll_delay_shift      = 3;
457         }
458
459         /*
460          * Compute the maximum sample delay the NFC allows, under current
461          * conditions. If the clock is running too slowly, no sample delay is
462          * possible.
463          */
464         if (clock_period_in_ns > nfc->max_dll_clock_period_in_ns)
465                 max_sample_delay_in_ns = 0;
466         else {
467                 /*
468                  * Compute the delay implied by the largest sample delay factor
469                  * the NFC allows.
470                  */
471                 max_sample_delay_in_ns =
472                         (nfc->max_sample_delay_factor * clock_period_in_ns) >>
473                                                                 dll_delay_shift;
474
475                 /*
476                  * Check if the implied sample delay larger than the NFC
477                  * actually allows.
478                  */
479                 if (max_sample_delay_in_ns > nfc->max_dll_delay_in_ns)
480                         max_sample_delay_in_ns = nfc->max_dll_delay_in_ns;
481         }
482
483         /*
484          * Check if improved timing information is available. If not, we have to
485          * use a less-sophisticated algorithm.
486          */
487         if (!improved_timing_is_available) {
488                 /*
489                  * Fold the read setup time required by the NFC into the ideal
490                  * sample delay.
491                  */
492                 ideal_sample_delay_in_ns = target.gpmi_sample_delay_in_ns +
493                                                 nfc->internal_data_setup_in_ns;
494
495                 /*
496                  * The ideal sample delay may be greater than the maximum
497                  * allowed by the NFC. If so, we can trade off sample delay time
498                  * for more data setup time.
499                  *
500                  * In each iteration of the following loop, we add a cycle to
501                  * the data setup time and subtract a corresponding amount from
502                  * the sample delay until we've satisified the constraints or
503                  * can't do any better.
504                  */
505                 while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) &&
506                         (data_setup_in_cycles < nfc->max_data_setup_cycles)) {
507
508                         data_setup_in_cycles++;
509                         ideal_sample_delay_in_ns -= clock_period_in_ns;
510
511                         if (ideal_sample_delay_in_ns < 0)
512                                 ideal_sample_delay_in_ns = 0;
513
514                 }
515
516                 /*
517                  * Compute the sample delay factor that corresponds most closely
518                  * to the ideal sample delay. If the result is too large for the
519                  * NFC, use the maximum value.
520                  *
521                  * Notice that we use the ns_to_cycles function to compute the
522                  * sample delay factor. We do this because the form of the
523                  * computation is the same as that for calculating cycles.
524                  */
525                 sample_delay_factor =
526                         ns_to_cycles(
527                                 ideal_sample_delay_in_ns << dll_delay_shift,
528                                                         clock_period_in_ns, 0);
529
530                 if (sample_delay_factor > nfc->max_sample_delay_factor)
531                         sample_delay_factor = nfc->max_sample_delay_factor;
532
533                 /* Skip to the part where we return our results. */
534                 goto return_results;
535         }
536
537         /*
538          * If control arrives here, we have more detailed timing information,
539          * so we can use a better algorithm.
540          */
541
542         /*
543          * Fold the read setup time required by the NFC into the maximum
544          * propagation delay.
545          */
546         max_prop_delay_in_ns += nfc->internal_data_setup_in_ns;
547
548         /*
549          * Earlier, we computed the number of clock cycles required to satisfy
550          * the data setup time. Now, we need to know the actual nanoseconds.
551          */
552         data_setup_in_ns = clock_period_in_ns * data_setup_in_cycles;
553
554         /*
555          * Compute tEYE, the width of the data eye when reading from the NAND
556          * Flash. The eye width is fundamentally determined by the data setup
557          * time, perturbed by propagation delays and some characteristics of the
558          * NAND Flash device.
559          *
560          * start of the eye = max_prop_delay + tREA
561          * end of the eye   = min_prop_delay + tRHOH + data_setup
562          */
563         tEYE = (int)min_prop_delay_in_ns + (int)target.tRHOH_in_ns +
564                                                         (int)data_setup_in_ns;
565
566         tEYE -= (int)max_prop_delay_in_ns + (int)target.tREA_in_ns;
567
568         /*
569          * The eye must be open. If it's not, we can try to open it by
570          * increasing its main forcer, the data setup time.
571          *
572          * In each iteration of the following loop, we increase the data setup
573          * time by a single clock cycle. We do this until either the eye is
574          * open or we run into NFC limits.
575          */
576         while ((tEYE <= 0) &&
577                         (data_setup_in_cycles < nfc->max_data_setup_cycles)) {
578                 /* Give a cycle to data setup. */
579                 data_setup_in_cycles++;
580                 /* Synchronize the data setup time with the cycles. */
581                 data_setup_in_ns += clock_period_in_ns;
582                 /* Adjust tEYE accordingly. */
583                 tEYE += clock_period_in_ns;
584         }
585
586         /*
587          * When control arrives here, the eye is open. The ideal time to sample
588          * the data is in the center of the eye:
589          *
590          *     end of the eye + start of the eye
591          *     ---------------------------------  -  data_setup
592          *                    2
593          *
594          * After some algebra, this simplifies to the code immediately below.
595          */
596         ideal_sample_delay_in_ns =
597                 ((int)max_prop_delay_in_ns +
598                         (int)target.tREA_in_ns +
599                                 (int)min_prop_delay_in_ns +
600                                         (int)target.tRHOH_in_ns -
601                                                 (int)data_setup_in_ns) >> 1;
602
603         /*
604          * The following figure illustrates some aspects of a NAND Flash read:
605          *
606          *
607          *           __                   _____________________________________
608          * RDN         \_________________/
609          *
610          *                                         <---- tEYE ----->
611          *                                        /-----------------\
612          * Read Data ----------------------------<                   >---------
613          *                                        \-----------------/
614          *             ^                 ^                 ^              ^
615          *             |                 |                 |              |
616          *             |<--Data Setup -->|<--Delay Time -->|              |
617          *             |                 |                 |              |
618          *             |                 |                                |
619          *             |                 |<--   Quantized Delay Time   -->|
620          *             |                 |                                |
621          *
622          *
623          * We have some issues we must now address:
624          *
625          * (1) The *ideal* sample delay time must not be negative. If it is, we
626          *     jam it to zero.
627          *
628          * (2) The *ideal* sample delay time must not be greater than that
629          *     allowed by the NFC. If it is, we can increase the data setup
630          *     time, which will reduce the delay between the end of the data
631          *     setup and the center of the eye. It will also make the eye
632          *     larger, which might help with the next issue...
633          *
634          * (3) The *quantized* sample delay time must not fall either before the
635          *     eye opens or after it closes (the latter is the problem
636          *     illustrated in the above figure).
637          */
638
639         /* Jam a negative ideal sample delay to zero. */
640         if (ideal_sample_delay_in_ns < 0)
641                 ideal_sample_delay_in_ns = 0;
642
643         /*
644          * Extend the data setup as needed to reduce the ideal sample delay
645          * below the maximum permitted by the NFC.
646          */
647         while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) &&
648                         (data_setup_in_cycles < nfc->max_data_setup_cycles)) {
649
650                 /* Give a cycle to data setup. */
651                 data_setup_in_cycles++;
652                 /* Synchronize the data setup time with the cycles. */
653                 data_setup_in_ns += clock_period_in_ns;
654                 /* Adjust tEYE accordingly. */
655                 tEYE += clock_period_in_ns;
656
657                 /*
658                  * Decrease the ideal sample delay by one half cycle, to keep it
659                  * in the middle of the eye.
660                  */
661                 ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1);
662
663                 /* Jam a negative ideal sample delay to zero. */
664                 if (ideal_sample_delay_in_ns < 0)
665                         ideal_sample_delay_in_ns = 0;
666         }
667
668         /*
669          * Compute the sample delay factor that corresponds to the ideal sample
670          * delay. If the result is too large, then use the maximum allowed
671          * value.
672          *
673          * Notice that we use the ns_to_cycles function to compute the sample
674          * delay factor. We do this because the form of the computation is the
675          * same as that for calculating cycles.
676          */
677         sample_delay_factor =
678                 ns_to_cycles(ideal_sample_delay_in_ns << dll_delay_shift,
679                                                         clock_period_in_ns, 0);
680
681         if (sample_delay_factor > nfc->max_sample_delay_factor)
682                 sample_delay_factor = nfc->max_sample_delay_factor;
683
684         /*
685          * These macros conveniently encapsulate a computation we'll use to
686          * continuously evaluate whether or not the data sample delay is inside
687          * the eye.
688          */
689         #define IDEAL_DELAY  ((int) ideal_sample_delay_in_ns)
690
691         #define QUANTIZED_DELAY  \
692                 ((int) ((sample_delay_factor * clock_period_in_ns) >> \
693                                                         dll_delay_shift))
694
695         #define DELAY_ERROR  (abs(QUANTIZED_DELAY - IDEAL_DELAY))
696
697         #define SAMPLE_IS_NOT_WITHIN_THE_EYE  (DELAY_ERROR > (tEYE >> 1))
698
699         /*
700          * While the quantized sample time falls outside the eye, reduce the
701          * sample delay or extend the data setup to move the sampling point back
702          * toward the eye. Do not allow the number of data setup cycles to
703          * exceed the maximum allowed by the NFC.
704          */
705         while (SAMPLE_IS_NOT_WITHIN_THE_EYE &&
706                         (data_setup_in_cycles < nfc->max_data_setup_cycles)) {
707                 /*
708                  * If control arrives here, the quantized sample delay falls
709                  * outside the eye. Check if it's before the eye opens, or after
710                  * the eye closes.
711                  */
712                 if (QUANTIZED_DELAY > IDEAL_DELAY) {
713                         /*
714                          * If control arrives here, the quantized sample delay
715                          * falls after the eye closes. Decrease the quantized
716                          * delay time and then go back to re-evaluate.
717                          */
718                         if (sample_delay_factor != 0)
719                                 sample_delay_factor--;
720                         continue;
721                 }
722
723                 /*
724                  * If control arrives here, the quantized sample delay falls
725                  * before the eye opens. Shift the sample point by increasing
726                  * data setup time. This will also make the eye larger.
727                  */
728
729                 /* Give a cycle to data setup. */
730                 data_setup_in_cycles++;
731                 /* Synchronize the data setup time with the cycles. */
732                 data_setup_in_ns += clock_period_in_ns;
733                 /* Adjust tEYE accordingly. */
734                 tEYE += clock_period_in_ns;
735
736                 /*
737                  * Decrease the ideal sample delay by one half cycle, to keep it
738                  * in the middle of the eye.
739                  */
740                 ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1);
741
742                 /* ...and one less period for the delay time. */
743                 ideal_sample_delay_in_ns -= clock_period_in_ns;
744
745                 /* Jam a negative ideal sample delay to zero. */
746                 if (ideal_sample_delay_in_ns < 0)
747                         ideal_sample_delay_in_ns = 0;
748
749                 /*
750                  * We have a new ideal sample delay, so re-compute the quantized
751                  * delay.
752                  */
753                 sample_delay_factor =
754                         ns_to_cycles(
755                                 ideal_sample_delay_in_ns << dll_delay_shift,
756                                                         clock_period_in_ns, 0);
757
758                 if (sample_delay_factor > nfc->max_sample_delay_factor)
759                         sample_delay_factor = nfc->max_sample_delay_factor;
760         }
761
762         /* Control arrives here when we're ready to return our results. */
763 return_results:
764         hw->data_setup_in_cycles    = data_setup_in_cycles;
765         hw->data_hold_in_cycles     = data_hold_in_cycles;
766         hw->address_setup_in_cycles = address_setup_in_cycles;
767         hw->use_half_periods        = dll_use_half_periods;
768         hw->sample_delay_factor     = sample_delay_factor;
769         hw->device_busy_timeout     = GPMI_DEFAULT_BUSY_TIMEOUT;
770         hw->wrn_dly_sel             = BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS;
771
772         /* Return success. */
773         return 0;
774 }
775
776 /*
777  * <1> Firstly, we should know what's the GPMI-clock means.
778  *     The GPMI-clock is the internal clock in the gpmi nand controller.
779  *     If you set 100MHz to gpmi nand controller, the GPMI-clock's period
780  *     is 10ns. Mark the GPMI-clock's period as GPMI-clock-period.
781  *
782  * <2> Secondly, we should know what's the frequency on the nand chip pins.
783  *     The frequency on the nand chip pins is derived from the GPMI-clock.
784  *     We can get it from the following equation:
785  *
786  *         F = G / (DS + DH)
787  *
788  *         F  : the frequency on the nand chip pins.
789  *         G  : the GPMI clock, such as 100MHz.
790  *         DS : GPMI_HW_GPMI_TIMING0:DATA_SETUP
791  *         DH : GPMI_HW_GPMI_TIMING0:DATA_HOLD
792  *
793  * <3> Thirdly, when the frequency on the nand chip pins is above 33MHz,
794  *     the nand EDO(extended Data Out) timing could be applied.
795  *     The GPMI implements a feedback read strobe to sample the read data.
796  *     The feedback read strobe can be delayed to support the nand EDO timing
797  *     where the read strobe may deasserts before the read data is valid, and
798  *     read data is valid for some time after read strobe.
799  *
800  *     The following figure illustrates some aspects of a NAND Flash read:
801  *
802  *                   |<---tREA---->|
803  *                   |             |
804  *                   |         |   |
805  *                   |<--tRP-->|   |
806  *                   |         |   |
807  *                  __          ___|__________________________________
808  *     RDN            \________/   |
809  *                                 |
810  *                                 /---------\
811  *     Read Data    --------------<           >---------
812  *                                 \---------/
813  *                                |     |
814  *                                |<-D->|
815  *     FeedbackRDN  ________             ____________
816  *                          \___________/
817  *
818  *          D stands for delay, set in the HW_GPMI_CTRL1:RDN_DELAY.
819  *
820  *
821  * <4> Now, we begin to describe how to compute the right RDN_DELAY.
822  *
823  *  4.1) From the aspect of the nand chip pins:
824  *        Delay = (tREA + C - tRP)               {1}
825  *
826  *        tREA : the maximum read access time. From the ONFI nand standards,
827  *               we know that tREA is 16ns in mode 5, tREA is 20ns is mode 4.
828  *               Please check it in : www.onfi.org
829  *        C    : a constant for adjust the delay. default is 4.
830  *        tRP  : the read pulse width.
831  *               Specified by the HW_GPMI_TIMING0:DATA_SETUP:
832  *                    tRP = (GPMI-clock-period) * DATA_SETUP
833  *
834  *  4.2) From the aspect of the GPMI nand controller:
835  *         Delay = RDN_DELAY * 0.125 * RP        {2}
836  *
837  *         RP   : the DLL reference period.
838  *            if (GPMI-clock-period > DLL_THRETHOLD)
839  *                   RP = GPMI-clock-period / 2;
840  *            else
841  *                   RP = GPMI-clock-period;
842  *
843  *            Set the HW_GPMI_CTRL1:HALF_PERIOD if GPMI-clock-period
844  *            is greater DLL_THRETHOLD. In other SOCs, the DLL_THRETHOLD
845  *            is 16ns, but in mx6q, we use 12ns.
846  *
847  *  4.3) since {1} equals {2}, we get:
848  *
849  *                    (tREA + 4 - tRP) * 8
850  *         RDN_DELAY = ---------------------     {3}
851  *                           RP
852  *
853  *  4.4) We only support the fastest asynchronous mode of ONFI nand.
854  *       For some ONFI nand, the mode 4 is the fastest mode;
855  *       while for some ONFI nand, the mode 5 is the fastest mode.
856  *       So we only support the mode 4 and mode 5. It is no need to
857  *       support other modes.
858  */
859 static void gpmi_compute_edo_timing(struct gpmi_nand_data *this,
860                         struct gpmi_nfc_hardware_timing *hw)
861 {
862         struct resources *r = &this->resources;
863         unsigned long rate = clk_get_rate(r->clock[0]);
864         int mode = this->timing_mode;
865         int dll_threshold = this->devdata->max_chain_delay;
866         unsigned long delay;
867         unsigned long clk_period;
868         int t_rea;
869         int c = 4;
870         int t_rp;
871         int rp;
872
873         /*
874          * [1] for GPMI_HW_GPMI_TIMING0:
875          *     The async mode requires 40MHz for mode 4, 50MHz for mode 5.
876          *     The GPMI can support 100MHz at most. So if we want to
877          *     get the 40MHz or 50MHz, we have to set DS=1, DH=1.
878          *     Set the ADDRESS_SETUP to 0 in mode 4.
879          */
880         hw->data_setup_in_cycles = 1;
881         hw->data_hold_in_cycles = 1;
882         hw->address_setup_in_cycles = ((mode == 5) ? 1 : 0);
883
884         /* [2] for GPMI_HW_GPMI_TIMING1 */
885         hw->device_busy_timeout = 0x9000;
886
887         /* [3] for GPMI_HW_GPMI_CTRL1 */
888         hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY;
889
890         /*
891          * Enlarge 10 times for the numerator and denominator in {3}.
892          * This make us to get more accurate result.
893          */
894         clk_period = NSEC_PER_SEC / (rate / 10);
895         dll_threshold *= 10;
896         t_rea = ((mode == 5) ? 16 : 20) * 10;
897         c *= 10;
898
899         t_rp = clk_period * 1; /* DATA_SETUP is 1 */
900
901         if (clk_period > dll_threshold) {
902                 hw->use_half_periods = 1;
903                 rp = clk_period / 2;
904         } else {
905                 hw->use_half_periods = 0;
906                 rp = clk_period;
907         }
908
909         /*
910          * Multiply the numerator with 10, we could do a round off:
911          *      7.8 round up to 8; 7.4 round down to 7.
912          */
913         delay  = (((t_rea + c - t_rp) * 8) * 10) / rp;
914         delay = (delay + 5) / 10;
915
916         hw->sample_delay_factor = delay;
917 }
918
919 static int enable_edo_mode(struct gpmi_nand_data *this, int mode)
920 {
921         struct resources  *r = &this->resources;
922         struct nand_chip *nand = &this->nand;
923         struct mtd_info  *mtd = nand_to_mtd(nand);
924         uint8_t *feature;
925         unsigned long rate;
926         int ret;
927
928         feature = kzalloc(ONFI_SUBFEATURE_PARAM_LEN, GFP_KERNEL);
929         if (!feature)
930                 return -ENOMEM;
931
932         nand->select_chip(mtd, 0);
933
934         /* [1] send SET FEATURE command to NAND */
935         feature[0] = mode;
936         ret = nand->onfi_set_features(mtd, nand,
937                                 ONFI_FEATURE_ADDR_TIMING_MODE, feature);
938         if (ret)
939                 goto err_out;
940
941         /* [2] send GET FEATURE command to double-check the timing mode */
942         memset(feature, 0, ONFI_SUBFEATURE_PARAM_LEN);
943         ret = nand->onfi_get_features(mtd, nand,
944                                 ONFI_FEATURE_ADDR_TIMING_MODE, feature);
945         if (ret || feature[0] != mode)
946                 goto err_out;
947
948         nand->select_chip(mtd, -1);
949
950         /* [3] set the main IO clock, 100MHz for mode 5, 80MHz for mode 4. */
951         rate = (mode == 5) ? 100000000 : 80000000;
952         clk_set_rate(r->clock[0], rate);
953
954         /* Let the gpmi_begin() re-compute the timing again. */
955         this->flags &= ~GPMI_TIMING_INIT_OK;
956
957         this->flags |= GPMI_ASYNC_EDO_ENABLED;
958         this->timing_mode = mode;
959         kfree(feature);
960         dev_info(this->dev, "enable the asynchronous EDO mode %d\n", mode);
961         return 0;
962
963 err_out:
964         nand->select_chip(mtd, -1);
965         kfree(feature);
966         dev_err(this->dev, "mode:%d ,failed in set feature.\n", mode);
967         return -EINVAL;
968 }
969
970 int gpmi_extra_init(struct gpmi_nand_data *this)
971 {
972         struct nand_chip *chip = &this->nand;
973
974         /* Enable the asynchronous EDO feature. */
975         if (GPMI_IS_MX6(this) && chip->onfi_version) {
976                 int mode = onfi_get_async_timing_mode(chip);
977
978                 /* We only support the timing mode 4 and mode 5. */
979                 if (mode & ONFI_TIMING_MODE_5)
980                         mode = 5;
981                 else if (mode & ONFI_TIMING_MODE_4)
982                         mode = 4;
983                 else
984                         return 0;
985
986                 return enable_edo_mode(this, mode);
987         }
988         return 0;
989 }
990
991 /* Begin the I/O */
992 void gpmi_begin(struct gpmi_nand_data *this)
993 {
994         struct resources *r = &this->resources;
995         void __iomem *gpmi_regs = r->gpmi_regs;
996         unsigned int   clock_period_in_ns;
997         uint32_t       reg;
998         unsigned int   dll_wait_time_in_us;
999         struct gpmi_nfc_hardware_timing  hw;
1000         int ret;
1001
1002         /* Enable the clock. */
1003         ret = gpmi_enable_clk(this);
1004         if (ret) {
1005                 dev_err(this->dev, "We failed in enable the clk\n");
1006                 goto err_out;
1007         }
1008
1009         /* Only initialize the timing once */
1010         if (this->flags & GPMI_TIMING_INIT_OK)
1011                 return;
1012         this->flags |= GPMI_TIMING_INIT_OK;
1013
1014         if (this->flags & GPMI_ASYNC_EDO_ENABLED)
1015                 gpmi_compute_edo_timing(this, &hw);
1016         else
1017                 gpmi_nfc_compute_hardware_timing(this, &hw);
1018
1019         /* [1] Set HW_GPMI_TIMING0 */
1020         reg = BF_GPMI_TIMING0_ADDRESS_SETUP(hw.address_setup_in_cycles) |
1021                 BF_GPMI_TIMING0_DATA_HOLD(hw.data_hold_in_cycles)         |
1022                 BF_GPMI_TIMING0_DATA_SETUP(hw.data_setup_in_cycles);
1023
1024         writel(reg, gpmi_regs + HW_GPMI_TIMING0);
1025
1026         /* [2] Set HW_GPMI_TIMING1 */
1027         writel(BF_GPMI_TIMING1_BUSY_TIMEOUT(hw.device_busy_timeout),
1028                 gpmi_regs + HW_GPMI_TIMING1);
1029
1030         /* [3] The following code is to set the HW_GPMI_CTRL1. */
1031
1032         /* Set the WRN_DLY_SEL */
1033         writel(BM_GPMI_CTRL1_WRN_DLY_SEL, gpmi_regs + HW_GPMI_CTRL1_CLR);
1034         writel(BF_GPMI_CTRL1_WRN_DLY_SEL(hw.wrn_dly_sel),
1035                                         gpmi_regs + HW_GPMI_CTRL1_SET);
1036
1037         /* DLL_ENABLE must be set to 0 when setting RDN_DELAY or HALF_PERIOD. */
1038         writel(BM_GPMI_CTRL1_DLL_ENABLE, gpmi_regs + HW_GPMI_CTRL1_CLR);
1039
1040         /* Clear out the DLL control fields. */
1041         reg = BM_GPMI_CTRL1_RDN_DELAY | BM_GPMI_CTRL1_HALF_PERIOD;
1042         writel(reg, gpmi_regs + HW_GPMI_CTRL1_CLR);
1043
1044         /* If no sample delay is called for, return immediately. */
1045         if (!hw.sample_delay_factor)
1046                 return;
1047
1048         /* Set RDN_DELAY or HALF_PERIOD. */
1049         reg = ((hw.use_half_periods) ? BM_GPMI_CTRL1_HALF_PERIOD : 0)
1050                 | BF_GPMI_CTRL1_RDN_DELAY(hw.sample_delay_factor);
1051
1052         writel(reg, gpmi_regs + HW_GPMI_CTRL1_SET);
1053
1054         /* At last, we enable the DLL. */
1055         writel(BM_GPMI_CTRL1_DLL_ENABLE, gpmi_regs + HW_GPMI_CTRL1_SET);
1056
1057         /*
1058          * After we enable the GPMI DLL, we have to wait 64 clock cycles before
1059          * we can use the GPMI. Calculate the amount of time we need to wait,
1060          * in microseconds.
1061          */
1062         clock_period_in_ns = NSEC_PER_SEC / clk_get_rate(r->clock[0]);
1063         dll_wait_time_in_us = (clock_period_in_ns * 64) / 1000;
1064
1065         if (!dll_wait_time_in_us)
1066                 dll_wait_time_in_us = 1;
1067
1068         /* Wait for the DLL to settle. */
1069         udelay(dll_wait_time_in_us);
1070
1071 err_out:
1072         return;
1073 }
1074
1075 void gpmi_end(struct gpmi_nand_data *this)
1076 {
1077         gpmi_disable_clk(this);
1078 }
1079
1080 /* Clears a BCH interrupt. */
1081 void gpmi_clear_bch(struct gpmi_nand_data *this)
1082 {
1083         struct resources *r = &this->resources;
1084         writel(BM_BCH_CTRL_COMPLETE_IRQ, r->bch_regs + HW_BCH_CTRL_CLR);
1085 }
1086
1087 /* Returns the Ready/Busy status of the given chip. */
1088 int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip)
1089 {
1090         struct resources *r = &this->resources;
1091         uint32_t mask = 0;
1092         uint32_t reg = 0;
1093
1094         if (GPMI_IS_MX23(this)) {
1095                 mask = MX23_BM_GPMI_DEBUG_READY0 << chip;
1096                 reg = readl(r->gpmi_regs + HW_GPMI_DEBUG);
1097         } else if (GPMI_IS_MX28(this) || GPMI_IS_MX6(this)) {
1098                 /*
1099                  * In the imx6, all the ready/busy pins are bound
1100                  * together. So we only need to check chip 0.
1101                  */
1102                 if (GPMI_IS_MX6(this))
1103                         chip = 0;
1104
1105                 /* MX28 shares the same R/B register as MX6Q. */
1106                 mask = MX28_BF_GPMI_STAT_READY_BUSY(1 << chip);
1107                 reg = readl(r->gpmi_regs + HW_GPMI_STAT);
1108         } else
1109                 dev_err(this->dev, "unknown arch.\n");
1110         return reg & mask;
1111 }
1112
1113 static inline void set_dma_type(struct gpmi_nand_data *this,
1114                                         enum dma_ops_type type)
1115 {
1116         this->last_dma_type = this->dma_type;
1117         this->dma_type = type;
1118 }
1119
1120 int gpmi_send_command(struct gpmi_nand_data *this)
1121 {
1122         struct dma_chan *channel = get_dma_chan(this);
1123         struct dma_async_tx_descriptor *desc;
1124         struct scatterlist *sgl;
1125         int chip = this->current_chip;
1126         u32 pio[3];
1127
1128         /* [1] send out the PIO words */
1129         pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(BV_GPMI_CTRL0_COMMAND_MODE__WRITE)
1130                 | BM_GPMI_CTRL0_WORD_LENGTH
1131                 | BF_GPMI_CTRL0_CS(chip, this)
1132                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1133                 | BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_CLE)
1134                 | BM_GPMI_CTRL0_ADDRESS_INCREMENT
1135                 | BF_GPMI_CTRL0_XFER_COUNT(this->command_length);
1136         pio[1] = pio[2] = 0;
1137         desc = dmaengine_prep_slave_sg(channel,
1138                                         (struct scatterlist *)pio,
1139                                         ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
1140         if (!desc)
1141                 return -EINVAL;
1142
1143         /* [2] send out the COMMAND + ADDRESS string stored in @buffer */
1144         sgl = &this->cmd_sgl;
1145
1146         sg_init_one(sgl, this->cmd_buffer, this->command_length);
1147         dma_map_sg(this->dev, sgl, 1, DMA_TO_DEVICE);
1148         desc = dmaengine_prep_slave_sg(channel,
1149                                 sgl, 1, DMA_MEM_TO_DEV,
1150                                 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1151         if (!desc)
1152                 return -EINVAL;
1153
1154         /* [3] submit the DMA */
1155         set_dma_type(this, DMA_FOR_COMMAND);
1156         return start_dma_without_bch_irq(this, desc);
1157 }
1158
1159 int gpmi_send_data(struct gpmi_nand_data *this)
1160 {
1161         struct dma_async_tx_descriptor *desc;
1162         struct dma_chan *channel = get_dma_chan(this);
1163         int chip = this->current_chip;
1164         uint32_t command_mode;
1165         uint32_t address;
1166         u32 pio[2];
1167
1168         /* [1] PIO */
1169         command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WRITE;
1170         address      = BV_GPMI_CTRL0_ADDRESS__NAND_DATA;
1171
1172         pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode)
1173                 | BM_GPMI_CTRL0_WORD_LENGTH
1174                 | BF_GPMI_CTRL0_CS(chip, this)
1175                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1176                 | BF_GPMI_CTRL0_ADDRESS(address)
1177                 | BF_GPMI_CTRL0_XFER_COUNT(this->upper_len);
1178         pio[1] = 0;
1179         desc = dmaengine_prep_slave_sg(channel, (struct scatterlist *)pio,
1180                                         ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
1181         if (!desc)
1182                 return -EINVAL;
1183
1184         /* [2] send DMA request */
1185         prepare_data_dma(this, DMA_TO_DEVICE);
1186         desc = dmaengine_prep_slave_sg(channel, &this->data_sgl,
1187                                         1, DMA_MEM_TO_DEV,
1188                                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1189         if (!desc)
1190                 return -EINVAL;
1191
1192         /* [3] submit the DMA */
1193         set_dma_type(this, DMA_FOR_WRITE_DATA);
1194         return start_dma_without_bch_irq(this, desc);
1195 }
1196
1197 int gpmi_read_data(struct gpmi_nand_data *this)
1198 {
1199         struct dma_async_tx_descriptor *desc;
1200         struct dma_chan *channel = get_dma_chan(this);
1201         int chip = this->current_chip;
1202         u32 pio[2];
1203
1204         /* [1] : send PIO */
1205         pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(BV_GPMI_CTRL0_COMMAND_MODE__READ)
1206                 | BM_GPMI_CTRL0_WORD_LENGTH
1207                 | BF_GPMI_CTRL0_CS(chip, this)
1208                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1209                 | BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_DATA)
1210                 | BF_GPMI_CTRL0_XFER_COUNT(this->upper_len);
1211         pio[1] = 0;
1212         desc = dmaengine_prep_slave_sg(channel,
1213                                         (struct scatterlist *)pio,
1214                                         ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
1215         if (!desc)
1216                 return -EINVAL;
1217
1218         /* [2] : send DMA request */
1219         prepare_data_dma(this, DMA_FROM_DEVICE);
1220         desc = dmaengine_prep_slave_sg(channel, &this->data_sgl,
1221                                         1, DMA_DEV_TO_MEM,
1222                                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1223         if (!desc)
1224                 return -EINVAL;
1225
1226         /* [3] : submit the DMA */
1227         set_dma_type(this, DMA_FOR_READ_DATA);
1228         return start_dma_without_bch_irq(this, desc);
1229 }
1230
1231 int gpmi_send_page(struct gpmi_nand_data *this,
1232                         dma_addr_t payload, dma_addr_t auxiliary)
1233 {
1234         struct bch_geometry *geo = &this->bch_geometry;
1235         uint32_t command_mode;
1236         uint32_t address;
1237         uint32_t ecc_command;
1238         uint32_t buffer_mask;
1239         struct dma_async_tx_descriptor *desc;
1240         struct dma_chan *channel = get_dma_chan(this);
1241         int chip = this->current_chip;
1242         u32 pio[6];
1243
1244         /* A DMA descriptor that does an ECC page read. */
1245         command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WRITE;
1246         address      = BV_GPMI_CTRL0_ADDRESS__NAND_DATA;
1247         ecc_command  = BV_GPMI_ECCCTRL_ECC_CMD__BCH_ENCODE;
1248         buffer_mask  = BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_PAGE |
1249                                 BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_AUXONLY;
1250
1251         pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode)
1252                 | BM_GPMI_CTRL0_WORD_LENGTH
1253                 | BF_GPMI_CTRL0_CS(chip, this)
1254                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1255                 | BF_GPMI_CTRL0_ADDRESS(address)
1256                 | BF_GPMI_CTRL0_XFER_COUNT(0);
1257         pio[1] = 0;
1258         pio[2] = BM_GPMI_ECCCTRL_ENABLE_ECC
1259                 | BF_GPMI_ECCCTRL_ECC_CMD(ecc_command)
1260                 | BF_GPMI_ECCCTRL_BUFFER_MASK(buffer_mask);
1261         pio[3] = geo->page_size;
1262         pio[4] = payload;
1263         pio[5] = auxiliary;
1264
1265         desc = dmaengine_prep_slave_sg(channel,
1266                                         (struct scatterlist *)pio,
1267                                         ARRAY_SIZE(pio), DMA_TRANS_NONE,
1268                                         DMA_CTRL_ACK);
1269         if (!desc)
1270                 return -EINVAL;
1271
1272         set_dma_type(this, DMA_FOR_WRITE_ECC_PAGE);
1273         return start_dma_with_bch_irq(this, desc);
1274 }
1275
1276 int gpmi_read_page(struct gpmi_nand_data *this,
1277                                 dma_addr_t payload, dma_addr_t auxiliary)
1278 {
1279         struct bch_geometry *geo = &this->bch_geometry;
1280         uint32_t command_mode;
1281         uint32_t address;
1282         uint32_t ecc_command;
1283         uint32_t buffer_mask;
1284         struct dma_async_tx_descriptor *desc;
1285         struct dma_chan *channel = get_dma_chan(this);
1286         int chip = this->current_chip;
1287         u32 pio[6];
1288
1289         /* [1] Wait for the chip to report ready. */
1290         command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY;
1291         address      = BV_GPMI_CTRL0_ADDRESS__NAND_DATA;
1292
1293         pio[0] =  BF_GPMI_CTRL0_COMMAND_MODE(command_mode)
1294                 | BM_GPMI_CTRL0_WORD_LENGTH
1295                 | BF_GPMI_CTRL0_CS(chip, this)
1296                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1297                 | BF_GPMI_CTRL0_ADDRESS(address)
1298                 | BF_GPMI_CTRL0_XFER_COUNT(0);
1299         pio[1] = 0;
1300         desc = dmaengine_prep_slave_sg(channel,
1301                                 (struct scatterlist *)pio, 2,
1302                                 DMA_TRANS_NONE, 0);
1303         if (!desc)
1304                 return -EINVAL;
1305
1306         /* [2] Enable the BCH block and read. */
1307         command_mode = BV_GPMI_CTRL0_COMMAND_MODE__READ;
1308         address      = BV_GPMI_CTRL0_ADDRESS__NAND_DATA;
1309         ecc_command  = BV_GPMI_ECCCTRL_ECC_CMD__BCH_DECODE;
1310         buffer_mask  = BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_PAGE
1311                         | BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_AUXONLY;
1312
1313         pio[0] =  BF_GPMI_CTRL0_COMMAND_MODE(command_mode)
1314                 | BM_GPMI_CTRL0_WORD_LENGTH
1315                 | BF_GPMI_CTRL0_CS(chip, this)
1316                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1317                 | BF_GPMI_CTRL0_ADDRESS(address)
1318                 | BF_GPMI_CTRL0_XFER_COUNT(geo->page_size);
1319
1320         pio[1] = 0;
1321         pio[2] =  BM_GPMI_ECCCTRL_ENABLE_ECC
1322                 | BF_GPMI_ECCCTRL_ECC_CMD(ecc_command)
1323                 | BF_GPMI_ECCCTRL_BUFFER_MASK(buffer_mask);
1324         pio[3] = geo->page_size;
1325         pio[4] = payload;
1326         pio[5] = auxiliary;
1327         desc = dmaengine_prep_slave_sg(channel,
1328                                         (struct scatterlist *)pio,
1329                                         ARRAY_SIZE(pio), DMA_TRANS_NONE,
1330                                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1331         if (!desc)
1332                 return -EINVAL;
1333
1334         /* [3] Disable the BCH block */
1335         command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY;
1336         address      = BV_GPMI_CTRL0_ADDRESS__NAND_DATA;
1337
1338         pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode)
1339                 | BM_GPMI_CTRL0_WORD_LENGTH
1340                 | BF_GPMI_CTRL0_CS(chip, this)
1341                 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this)
1342                 | BF_GPMI_CTRL0_ADDRESS(address)
1343                 | BF_GPMI_CTRL0_XFER_COUNT(geo->page_size);
1344         pio[1] = 0;
1345         pio[2] = 0; /* clear GPMI_HW_GPMI_ECCCTRL, disable the BCH. */
1346         desc = dmaengine_prep_slave_sg(channel,
1347                                 (struct scatterlist *)pio, 3,
1348                                 DMA_TRANS_NONE,
1349                                 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1350         if (!desc)
1351                 return -EINVAL;
1352
1353         /* [4] submit the DMA */
1354         set_dma_type(this, DMA_FOR_READ_ECC_PAGE);
1355         return start_dma_with_bch_irq(this, desc);
1356 }
1357
1358 /**
1359  * gpmi_copy_bits - copy bits from one memory region to another
1360  * @dst: destination buffer
1361  * @dst_bit_off: bit offset we're starting to write at
1362  * @src: source buffer
1363  * @src_bit_off: bit offset we're starting to read from
1364  * @nbits: number of bits to copy
1365  *
1366  * This functions copies bits from one memory region to another, and is used by
1367  * the GPMI driver to copy ECC sections which are not guaranteed to be byte
1368  * aligned.
1369  *
1370  * src and dst should not overlap.
1371  *
1372  */
1373 void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
1374                     const u8 *src, size_t src_bit_off,
1375                     size_t nbits)
1376 {
1377         size_t i;
1378         size_t nbytes;
1379         u32 src_buffer = 0;
1380         size_t bits_in_src_buffer = 0;
1381
1382         if (!nbits)
1383                 return;
1384
1385         /*
1386          * Move src and dst pointers to the closest byte pointer and store bit
1387          * offsets within a byte.
1388          */
1389         src += src_bit_off / 8;
1390         src_bit_off %= 8;
1391
1392         dst += dst_bit_off / 8;
1393         dst_bit_off %= 8;
1394
1395         /*
1396          * Initialize the src_buffer value with bits available in the first
1397          * byte of data so that we end up with a byte aligned src pointer.
1398          */
1399         if (src_bit_off) {
1400                 src_buffer = src[0] >> src_bit_off;
1401                 if (nbits >= (8 - src_bit_off)) {
1402                         bits_in_src_buffer += 8 - src_bit_off;
1403                 } else {
1404                         src_buffer &= GENMASK(nbits - 1, 0);
1405                         bits_in_src_buffer += nbits;
1406                 }
1407                 nbits -= bits_in_src_buffer;
1408                 src++;
1409         }
1410
1411         /* Calculate the number of bytes that can be copied from src to dst. */
1412         nbytes = nbits / 8;
1413
1414         /* Try to align dst to a byte boundary. */
1415         if (dst_bit_off) {
1416                 if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) {
1417                         src_buffer |= src[0] << bits_in_src_buffer;
1418                         bits_in_src_buffer += 8;
1419                         src++;
1420                         nbytes--;
1421                 }
1422
1423                 if (bits_in_src_buffer >= (8 - dst_bit_off)) {
1424                         dst[0] &= GENMASK(dst_bit_off - 1, 0);
1425                         dst[0] |= src_buffer << dst_bit_off;
1426                         src_buffer >>= (8 - dst_bit_off);
1427                         bits_in_src_buffer -= (8 - dst_bit_off);
1428                         dst_bit_off = 0;
1429                         dst++;
1430                         if (bits_in_src_buffer > 7) {
1431                                 bits_in_src_buffer -= 8;
1432                                 dst[0] = src_buffer;
1433                                 dst++;
1434                                 src_buffer >>= 8;
1435                         }
1436                 }
1437         }
1438
1439         if (!bits_in_src_buffer && !dst_bit_off) {
1440                 /*
1441                  * Both src and dst pointers are byte aligned, thus we can
1442                  * just use the optimized memcpy function.
1443                  */
1444                 if (nbytes)
1445                         memcpy(dst, src, nbytes);
1446         } else {
1447                 /*
1448                  * src buffer is not byte aligned, hence we have to copy each
1449                  * src byte to the src_buffer variable before extracting a byte
1450                  * to store in dst.
1451                  */
1452                 for (i = 0; i < nbytes; i++) {
1453                         src_buffer |= src[i] << bits_in_src_buffer;
1454                         dst[i] = src_buffer;
1455                         src_buffer >>= 8;
1456                 }
1457         }
1458         /* Update dst and src pointers */
1459         dst += nbytes;
1460         src += nbytes;
1461
1462         /*
1463          * nbits is the number of remaining bits. It should not exceed 8 as
1464          * we've already copied as much bytes as possible.
1465          */
1466         nbits %= 8;
1467
1468         /*
1469          * If there's no more bits to copy to the destination and src buffer
1470          * was already byte aligned, then we're done.
1471          */
1472         if (!nbits && !bits_in_src_buffer)
1473                 return;
1474
1475         /* Copy the remaining bits to src_buffer */
1476         if (nbits)
1477                 src_buffer |= (*src & GENMASK(nbits - 1, 0)) <<
1478                               bits_in_src_buffer;
1479         bits_in_src_buffer += nbits;
1480
1481         /*
1482          * In case there were not enough bits to get a byte aligned dst buffer
1483          * prepare the src_buffer variable to match the dst organization (shift
1484          * src_buffer by dst_bit_off and retrieve the least significant bits
1485          * from dst).
1486          */
1487         if (dst_bit_off)
1488                 src_buffer = (src_buffer << dst_bit_off) |
1489                              (*dst & GENMASK(dst_bit_off - 1, 0));
1490         bits_in_src_buffer += dst_bit_off;
1491
1492         /*
1493          * Keep most significant bits from dst if we end up with an unaligned
1494          * number of bits.
1495          */
1496         nbytes = bits_in_src_buffer / 8;
1497         if (bits_in_src_buffer % 8) {
1498                 src_buffer |= (dst[nbytes] &
1499                                GENMASK(7, bits_in_src_buffer % 8)) <<
1500                               (nbytes * 8);
1501                 nbytes++;
1502         }
1503
1504         /* Copy the remaining bytes to dst */
1505         for (i = 0; i < nbytes; i++) {
1506                 dst[i] = src_buffer;
1507                 src_buffer >>= 8;
1508         }
1509 }