]> git.karo-electronics.de Git - karo-tx-uboot.git/blob - drivers/ddr/altera/sequencer.c
ddr: altera: Internal scc_mgr_apply_group_all_out_delay_add() cleanup part 1
[karo-tx-uboot.git] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include "sequencer.h"
11 #include "sequencer_auto.h"
12 #include "sequencer_auto_ac_init.h"
13 #include "sequencer_auto_inst_init.h"
14 #include "sequencer_defines.h"
15
16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
17         (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
18
19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
20         (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
21
22 static struct socfpga_sdr_reg_file *sdr_reg_file =
23         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
24
25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
26         (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
27
28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
29         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
30
31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
32         (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
33
34 static struct socfpga_data_mgr *data_mgr =
35         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
36
37 static struct socfpga_sdr_ctrl *sdr_ctrl =
38         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
39
40 #define DELTA_D         1
41
42 /*
43  * In order to reduce ROM size, most of the selectable calibration steps are
44  * decided at compile time based on the user's calibration mode selection,
45  * as captured by the STATIC_CALIB_STEPS selection below.
46  *
47  * However, to support simulation-time selection of fast simulation mode, where
48  * we skip everything except the bare minimum, we need a few of the steps to
49  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
50  * check, which is based on the rtl-supplied value, or we dynamically compute
51  * the value to use based on the dynamically-chosen calibration mode
52  */
53
54 #define DLEVEL 0
55 #define STATIC_IN_RTL_SIM 0
56 #define STATIC_SKIP_DELAY_LOOPS 0
57
58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
59         STATIC_SKIP_DELAY_LOOPS)
60
61 /* calibration steps requested by the rtl */
62 uint16_t dyn_calib_steps;
63
64 /*
65  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
66  * instead of static, we use boolean logic to select between
67  * non-skip and skip values
68  *
69  * The mask is set to include all bits when not-skipping, but is
70  * zero when skipping
71  */
72
73 uint16_t skip_delay_mask;       /* mask off bits when skipping/not-skipping */
74
75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
76         ((non_skip_value) & skip_delay_mask)
77
78 struct gbl_type *gbl;
79 struct param_type *param;
80 uint32_t curr_shadow_reg;
81
82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
83         uint32_t write_group, uint32_t use_dm,
84         uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
85
86 static void set_failing_group_stage(uint32_t group, uint32_t stage,
87         uint32_t substage)
88 {
89         /*
90          * Only set the global stage if there was not been any other
91          * failing group
92          */
93         if (gbl->error_stage == CAL_STAGE_NIL)  {
94                 gbl->error_substage = substage;
95                 gbl->error_stage = stage;
96                 gbl->error_group = group;
97         }
98 }
99
100 static void reg_file_set_group(u16 set_group)
101 {
102         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
103 }
104
105 static void reg_file_set_stage(u8 set_stage)
106 {
107         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
108 }
109
110 static void reg_file_set_sub_stage(u8 set_sub_stage)
111 {
112         set_sub_stage &= 0xff;
113         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
114 }
115
116 static void initialize(void)
117 {
118         debug("%s:%d\n", __func__, __LINE__);
119         /* USER calibration has control over path to memory */
120         /*
121          * In Hard PHY this is a 2-bit control:
122          * 0: AFI Mux Select
123          * 1: DDIO Mux Select
124          */
125         writel(0x3, &phy_mgr_cfg->mux_sel);
126
127         /* USER memory clock is not stable we begin initialization  */
128         writel(0, &phy_mgr_cfg->reset_mem_stbl);
129
130         /* USER calibration status all set to zero */
131         writel(0, &phy_mgr_cfg->cal_status);
132
133         writel(0, &phy_mgr_cfg->cal_debug_info);
134
135         if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) {
136                 param->read_correct_mask_vg  = ((uint32_t)1 <<
137                         (RW_MGR_MEM_DQ_PER_READ_DQS /
138                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
139                 param->write_correct_mask_vg = ((uint32_t)1 <<
140                         (RW_MGR_MEM_DQ_PER_READ_DQS /
141                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
142                 param->read_correct_mask     = ((uint32_t)1 <<
143                         RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
144                 param->write_correct_mask    = ((uint32_t)1 <<
145                         RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
146                 param->dm_correct_mask       = ((uint32_t)1 <<
147                         (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH))
148                         - 1;
149         }
150 }
151
152 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode)
153 {
154         uint32_t odt_mask_0 = 0;
155         uint32_t odt_mask_1 = 0;
156         uint32_t cs_and_odt_mask;
157
158         if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) {
159                 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) {
160                         /*
161                          * 1 Rank
162                          * Read: ODT = 0
163                          * Write: ODT = 1
164                          */
165                         odt_mask_0 = 0x0;
166                         odt_mask_1 = 0x1;
167                 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) {
168                         /* 2 Ranks */
169                         if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
170                                 /* - Dual-Slot , Single-Rank
171                                  * (1 chip-select per DIMM)
172                                  * OR
173                                  * - RDIMM, 4 total CS (2 CS per DIMM)
174                                  * means 2 DIMM
175                                  * Since MEM_NUMBER_OF_RANKS is 2 they are
176                                  * both single rank
177                                  * with 2 CS each (special for RDIMM)
178                                  * Read: Turn on ODT on the opposite rank
179                                  * Write: Turn on ODT on all ranks
180                                  */
181                                 odt_mask_0 = 0x3 & ~(1 << rank);
182                                 odt_mask_1 = 0x3;
183                         } else {
184                                 /*
185                                  * USER - Single-Slot , Dual-rank DIMMs
186                                  * (2 chip-selects per DIMM)
187                                  * USER Read: Turn on ODT off on all ranks
188                                  * USER Write: Turn on ODT on active rank
189                                  */
190                                 odt_mask_0 = 0x0;
191                                 odt_mask_1 = 0x3 & (1 << rank);
192                         }
193                 } else {
194                         /* 4 Ranks
195                          * Read:
196                          * ----------+-----------------------+
197                          *           |                       |
198                          *           |         ODT           |
199                          * Read From +-----------------------+
200                          *   Rank    |  3  |  2  |  1  |  0  |
201                          * ----------+-----+-----+-----+-----+
202                          *     0     |  0  |  1  |  0  |  0  |
203                          *     1     |  1  |  0  |  0  |  0  |
204                          *     2     |  0  |  0  |  0  |  1  |
205                          *     3     |  0  |  0  |  1  |  0  |
206                          * ----------+-----+-----+-----+-----+
207                          *
208                          * Write:
209                          * ----------+-----------------------+
210                          *           |                       |
211                          *           |         ODT           |
212                          * Write To  +-----------------------+
213                          *   Rank    |  3  |  2  |  1  |  0  |
214                          * ----------+-----+-----+-----+-----+
215                          *     0     |  0  |  1  |  0  |  1  |
216                          *     1     |  1  |  0  |  1  |  0  |
217                          *     2     |  0  |  1  |  0  |  1  |
218                          *     3     |  1  |  0  |  1  |  0  |
219                          * ----------+-----+-----+-----+-----+
220                          */
221                         switch (rank) {
222                         case 0:
223                                 odt_mask_0 = 0x4;
224                                 odt_mask_1 = 0x5;
225                                 break;
226                         case 1:
227                                 odt_mask_0 = 0x8;
228                                 odt_mask_1 = 0xA;
229                                 break;
230                         case 2:
231                                 odt_mask_0 = 0x1;
232                                 odt_mask_1 = 0x5;
233                                 break;
234                         case 3:
235                                 odt_mask_0 = 0x2;
236                                 odt_mask_1 = 0xA;
237                                 break;
238                         }
239                 }
240         } else {
241                 odt_mask_0 = 0x0;
242                 odt_mask_1 = 0x0;
243         }
244
245         cs_and_odt_mask =
246                 (0xFF & ~(1 << rank)) |
247                 ((0xFF & odt_mask_0) << 8) |
248                 ((0xFF & odt_mask_1) << 16);
249         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
250                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
251 }
252
253 /**
254  * scc_mgr_set() - Set SCC Manager register
255  * @off:        Base offset in SCC Manager space
256  * @grp:        Read/Write group
257  * @val:        Value to be set
258  *
259  * This function sets the SCC Manager (Scan Chain Control Manager) register.
260  */
261 static void scc_mgr_set(u32 off, u32 grp, u32 val)
262 {
263         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
264 }
265
266 /**
267  * scc_mgr_initialize() - Initialize SCC Manager registers
268  *
269  * Initialize SCC Manager registers.
270  */
271 static void scc_mgr_initialize(void)
272 {
273         /*
274          * Clear register file for HPS. 16 (2^4) is the size of the
275          * full register file in the scc mgr:
276          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
277          *                             MEM_IF_READ_DQS_WIDTH - 1);
278          */
279         int i;
280
281         for (i = 0; i < 16; i++) {
282                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
283                            __func__, __LINE__, i);
284                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
285         }
286 }
287
288 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
289 {
290         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
291 }
292
293 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
294 {
295         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
296 }
297
298 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
299 {
300         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
301 }
302
303 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
304 {
305         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
306 }
307
308 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay)
309 {
310         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
311                     delay);
312 }
313
314 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
315 {
316         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
317 }
318
319 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
320 {
321         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
322 }
323
324 static void scc_mgr_set_dqs_out1_delay(uint32_t delay)
325 {
326         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
327                     delay);
328 }
329
330 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
331 {
332         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
333                     RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
334                     delay);
335 }
336
337 /* load up dqs config settings */
338 static void scc_mgr_load_dqs(uint32_t dqs)
339 {
340         writel(dqs, &sdr_scc_mgr->dqs_ena);
341 }
342
343 /* load up dqs io config settings */
344 static void scc_mgr_load_dqs_io(void)
345 {
346         writel(0, &sdr_scc_mgr->dqs_io_ena);
347 }
348
349 /* load up dq config settings */
350 static void scc_mgr_load_dq(uint32_t dq_in_group)
351 {
352         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
353 }
354
355 /* load up dm config settings */
356 static void scc_mgr_load_dm(uint32_t dm)
357 {
358         writel(dm, &sdr_scc_mgr->dm_ena);
359 }
360
361 /**
362  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
363  * @off:        Base offset in SCC Manager space
364  * @grp:        Read/Write group
365  * @val:        Value to be set
366  * @update:     If non-zero, trigger SCC Manager update for all ranks
367  *
368  * This function sets the SCC Manager (Scan Chain Control Manager) register
369  * and optionally triggers the SCC update for all ranks.
370  */
371 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
372                                   const int update)
373 {
374         u32 r;
375
376         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
377              r += NUM_RANKS_PER_SHADOW_REG) {
378                 scc_mgr_set(off, grp, val);
379
380                 if (update || (r == 0)) {
381                         writel(grp, &sdr_scc_mgr->dqs_ena);
382                         writel(0, &sdr_scc_mgr->update);
383                 }
384         }
385 }
386
387 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
388 {
389         /*
390          * USER although the h/w doesn't support different phases per
391          * shadow register, for simplicity our scc manager modeling
392          * keeps different phase settings per shadow reg, and it's
393          * important for us to keep them in sync to match h/w.
394          * for efficiency, the scan chain update should occur only
395          * once to sr0.
396          */
397         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
398                               read_group, phase, 0);
399 }
400
401 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
402                                                      uint32_t phase)
403 {
404         /*
405          * USER although the h/w doesn't support different phases per
406          * shadow register, for simplicity our scc manager modeling
407          * keeps different phase settings per shadow reg, and it's
408          * important for us to keep them in sync to match h/w.
409          * for efficiency, the scan chain update should occur only
410          * once to sr0.
411          */
412         scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
413                               write_group, phase, 0);
414 }
415
416 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
417                                                uint32_t delay)
418 {
419         /*
420          * In shadow register mode, the T11 settings are stored in
421          * registers in the core, which are updated by the DQS_ENA
422          * signals. Not issuing the SCC_MGR_UPD command allows us to
423          * save lots of rank switching overhead, by calling
424          * select_shadow_regs_for_update with update_scan_chains
425          * set to 0.
426          */
427         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
428                               read_group, delay, 1);
429         writel(0, &sdr_scc_mgr->update);
430 }
431
432 /**
433  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
434  * @write_group:        Write group
435  * @delay:              Delay value
436  *
437  * This function sets the OCT output delay in SCC manager.
438  */
439 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
440 {
441         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
442                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
443         const int base = write_group * ratio;
444         int i;
445         /*
446          * Load the setting in the SCC manager
447          * Although OCT affects only write data, the OCT delay is controlled
448          * by the DQS logic block which is instantiated once per read group.
449          * For protocols where a write group consists of multiple read groups,
450          * the setting must be set multiple times.
451          */
452         for (i = 0; i < ratio; i++)
453                 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
454 }
455
456 /**
457  * scc_mgr_set_hhp_extras() - Set HHP extras.
458  *
459  * Load the fixed setting in the SCC manager HHP extras.
460  */
461 static void scc_mgr_set_hhp_extras(void)
462 {
463         /*
464          * Load the fixed setting in the SCC manager
465          * bits: 0:0 = 1'b1     - DQS bypass
466          * bits: 1:1 = 1'b1     - DQ bypass
467          * bits: 4:2 = 3'b001   - rfifo_mode
468          * bits: 6:5 = 2'b01    - rfifo clock_select
469          * bits: 7:7 = 1'b0     - separate gating from ungating setting
470          * bits: 8:8 = 1'b0     - separate OE from Output delay setting
471          */
472         const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
473                           (1 << 2) | (1 << 1) | (1 << 0);
474         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
475                          SCC_MGR_HHP_GLOBALS_OFFSET |
476                          SCC_MGR_HHP_EXTRAS_OFFSET;
477
478         debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
479                    __func__, __LINE__);
480         writel(value, addr);
481         debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
482                    __func__, __LINE__);
483 }
484
485 /**
486  * scc_mgr_zero_all() - Zero all DQS config
487  *
488  * Zero all DQS config.
489  */
490 static void scc_mgr_zero_all(void)
491 {
492         int i, r;
493
494         /*
495          * USER Zero all DQS config settings, across all groups and all
496          * shadow registers
497          */
498         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
499              r += NUM_RANKS_PER_SHADOW_REG) {
500                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
501                         /*
502                          * The phases actually don't exist on a per-rank basis,
503                          * but there's no harm updating them several times, so
504                          * let's keep the code simple.
505                          */
506                         scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
507                         scc_mgr_set_dqs_en_phase(i, 0);
508                         scc_mgr_set_dqs_en_delay(i, 0);
509                 }
510
511                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
512                         scc_mgr_set_dqdqs_output_phase(i, 0);
513                         /* Arria V/Cyclone V don't have out2. */
514                         scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
515                 }
516         }
517
518         /* Multicast to all DQS group enables. */
519         writel(0xff, &sdr_scc_mgr->dqs_ena);
520         writel(0, &sdr_scc_mgr->update);
521 }
522
523 /**
524  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
525  * @write_group:        Write group
526  *
527  * Set bypass mode and trigger SCC update.
528  */
529 static void scc_set_bypass_mode(const u32 write_group)
530 {
531         /* Multicast to all DQ enables. */
532         writel(0xff, &sdr_scc_mgr->dq_ena);
533         writel(0xff, &sdr_scc_mgr->dm_ena);
534
535         /* Update current DQS IO enable. */
536         writel(0, &sdr_scc_mgr->dqs_io_ena);
537
538         /* Update the DQS logic. */
539         writel(write_group, &sdr_scc_mgr->dqs_ena);
540
541         /* Hit update. */
542         writel(0, &sdr_scc_mgr->update);
543 }
544
545 /**
546  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
547  * @write_group:        Write group
548  *
549  * Load DQS settings for Write Group, do not trigger SCC update.
550  */
551 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
552 {
553         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
554                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
555         const int base = write_group * ratio;
556         int i;
557         /*
558          * Load the setting in the SCC manager
559          * Although OCT affects only write data, the OCT delay is controlled
560          * by the DQS logic block which is instantiated once per read group.
561          * For protocols where a write group consists of multiple read groups,
562          * the setting must be set multiple times.
563          */
564         for (i = 0; i < ratio; i++)
565                 writel(base + i, &sdr_scc_mgr->dqs_ena);
566 }
567
568 /**
569  * scc_mgr_zero_group() - Zero all configs for a group
570  *
571  * Zero DQ, DM, DQS and OCT configs for a group.
572  */
573 static void scc_mgr_zero_group(const u32 write_group, const int out_only)
574 {
575         int i, r;
576
577         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
578              r += NUM_RANKS_PER_SHADOW_REG) {
579                 /* Zero all DQ config settings. */
580                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
581                         scc_mgr_set_dq_out1_delay(i, 0);
582                         if (!out_only)
583                                 scc_mgr_set_dq_in_delay(i, 0);
584                 }
585
586                 /* Multicast to all DQ enables. */
587                 writel(0xff, &sdr_scc_mgr->dq_ena);
588
589                 /* Zero all DM config settings. */
590                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
591                         scc_mgr_set_dm_out1_delay(i, 0);
592
593                 /* Multicast to all DM enables. */
594                 writel(0xff, &sdr_scc_mgr->dm_ena);
595
596                 /* Zero all DQS IO settings. */
597                 if (!out_only)
598                         scc_mgr_set_dqs_io_in_delay(0);
599
600                 /* Arria V/Cyclone V don't have out2. */
601                 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE);
602                 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
603                 scc_mgr_load_dqs_for_write_group(write_group);
604
605                 /* Multicast to all DQS IO enables (only 1 in total). */
606                 writel(0, &sdr_scc_mgr->dqs_io_ena);
607
608                 /* Hit update to zero everything. */
609                 writel(0, &sdr_scc_mgr->update);
610         }
611 }
612
613 /*
614  * apply and load a particular input delay for the DQ pins in a group
615  * group_bgn is the index of the first dq pin (in the write group)
616  */
617 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay)
618 {
619         uint32_t i, p;
620
621         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
622                 scc_mgr_set_dq_in_delay(p, delay);
623                 scc_mgr_load_dq(p);
624         }
625 }
626
627 /**
628  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
629  * @delay:              Delay value
630  *
631  * Apply and load a particular output delay for the DQ pins in a group.
632  */
633 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
634 {
635         int i;
636
637         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
638                 scc_mgr_set_dq_out1_delay(i, delay);
639                 scc_mgr_load_dq(i);
640         }
641 }
642
643 /* apply and load a particular output delay for the DM pins in a group */
644 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1)
645 {
646         uint32_t i;
647
648         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
649                 scc_mgr_set_dm_out1_delay(i, delay1);
650                 scc_mgr_load_dm(i);
651         }
652 }
653
654
655 /* apply and load delay on both DQS and OCT out1 */
656 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
657                                                     uint32_t delay)
658 {
659         scc_mgr_set_dqs_out1_delay(delay);
660         scc_mgr_load_dqs_io();
661
662         scc_mgr_set_oct_out1_delay(write_group, delay);
663         scc_mgr_load_dqs_for_write_group(write_group);
664 }
665
666 /* apply a delay to the entire output side: DQ, DM, DQS, OCT */
667 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
668                                                   const u32 group_bgn,
669                                                   const u32 delay)
670 {
671         u32 i, new_delay;
672
673         /* DQ shift */
674         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++)
675                 scc_mgr_load_dq(i);
676
677         /* DM shift */
678         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
679                 scc_mgr_load_dm(i);
680
681         /* dqs shift */
682         new_delay = READ_SCC_DQS_IO_OUT2_DELAY;
683         new_delay += delay;
684
685         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
686                 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
687                            " adding %u to OUT1\n", __func__, __LINE__,
688                            write_group, group_bgn, delay, new_delay,
689                            IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
690                            new_delay - IO_IO_OUT2_DELAY_MAX);
691                 scc_mgr_set_dqs_out1_delay(new_delay -
692                                            IO_IO_OUT2_DELAY_MAX);
693                 new_delay = IO_IO_OUT2_DELAY_MAX;
694         }
695
696         scc_mgr_load_dqs_io();
697
698         /* oct shift */
699         new_delay = READ_SCC_OCT_OUT2_DELAY;
700         new_delay += delay;
701
702         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
703                 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
704                            " adding %u to OUT1\n", __func__, __LINE__,
705                            write_group, group_bgn, delay, new_delay,
706                            IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
707                            new_delay - IO_IO_OUT2_DELAY_MAX);
708                 scc_mgr_set_oct_out1_delay(write_group, new_delay -
709                                            IO_IO_OUT2_DELAY_MAX);
710                 new_delay = IO_IO_OUT2_DELAY_MAX;
711         }
712
713         scc_mgr_load_dqs_for_write_group(write_group);
714 }
715
716 /*
717  * USER apply a delay to the entire output side (DQ, DM, DQS, OCT)
718  * and to all ranks
719  */
720 static void scc_mgr_apply_group_all_out_delay_add_all_ranks(
721         uint32_t write_group, uint32_t group_bgn, uint32_t delay)
722 {
723         uint32_t r;
724
725         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
726                 r += NUM_RANKS_PER_SHADOW_REG) {
727                 scc_mgr_apply_group_all_out_delay_add(write_group,
728                                                       group_bgn, delay);
729                 writel(0, &sdr_scc_mgr->update);
730         }
731 }
732
733 /* optimization used to recover some slots in ddr3 inst_rom */
734 /* could be applied to other protocols if we wanted to */
735 static void set_jump_as_return(void)
736 {
737         /*
738          * to save space, we replace return with jump to special shared
739          * RETURN instruction so we set the counter to large value so that
740          * we always jump
741          */
742         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
743         writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
744 }
745
746 /*
747  * should always use constants as argument to ensure all computations are
748  * performed at compile time
749  */
750 static void delay_for_n_mem_clocks(const uint32_t clocks)
751 {
752         uint32_t afi_clocks;
753         uint8_t inner = 0;
754         uint8_t outer = 0;
755         uint16_t c_loop = 0;
756
757         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
758
759
760         afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO;
761         /* scale (rounding up) to get afi clocks */
762
763         /*
764          * Note, we don't bother accounting for being off a little bit
765          * because of a few extra instructions in outer loops
766          * Note, the loops have a test at the end, and do the test before
767          * the decrement, and so always perform the loop
768          * 1 time more than the counter value
769          */
770         if (afi_clocks == 0) {
771                 ;
772         } else if (afi_clocks <= 0x100) {
773                 inner = afi_clocks-1;
774                 outer = 0;
775                 c_loop = 0;
776         } else if (afi_clocks <= 0x10000) {
777                 inner = 0xff;
778                 outer = (afi_clocks-1) >> 8;
779                 c_loop = 0;
780         } else {
781                 inner = 0xff;
782                 outer = 0xff;
783                 c_loop = (afi_clocks-1) >> 16;
784         }
785
786         /*
787          * rom instructions are structured as follows:
788          *
789          *    IDLE_LOOP2: jnz cntr0, TARGET_A
790          *    IDLE_LOOP1: jnz cntr1, TARGET_B
791          *                return
792          *
793          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
794          * TARGET_B is set to IDLE_LOOP2 as well
795          *
796          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
797          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
798          *
799          * a little confusing, but it helps save precious space in the inst_rom
800          * and sequencer rom and keeps the delays more accurate and reduces
801          * overhead
802          */
803         if (afi_clocks <= 0x100) {
804                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
805                         &sdr_rw_load_mgr_regs->load_cntr1);
806
807                 writel(RW_MGR_IDLE_LOOP1,
808                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
809
810                 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
811                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
812         } else {
813                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
814                         &sdr_rw_load_mgr_regs->load_cntr0);
815
816                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
817                         &sdr_rw_load_mgr_regs->load_cntr1);
818
819                 writel(RW_MGR_IDLE_LOOP2,
820                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
821
822                 writel(RW_MGR_IDLE_LOOP2,
823                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
824
825                 /* hack to get around compiler not being smart enough */
826                 if (afi_clocks <= 0x10000) {
827                         /* only need to run once */
828                         writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS |
829                                                   RW_MGR_RUN_SINGLE_GROUP_OFFSET);
830                 } else {
831                         do {
832                                 writel(RW_MGR_IDLE_LOOP2,
833                                         SDR_PHYGRP_RWMGRGRP_ADDRESS |
834                                         RW_MGR_RUN_SINGLE_GROUP_OFFSET);
835                         } while (c_loop-- != 0);
836                 }
837         }
838         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
839 }
840
841 static void rw_mgr_mem_initialize(void)
842 {
843         uint32_t r;
844         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
845                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
846
847         debug("%s:%d\n", __func__, __LINE__);
848
849         /* The reset / cke part of initialization is broadcasted to all ranks */
850         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
851                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
852
853         /*
854          * Here's how you load register for a loop
855          * Counters are located @ 0x800
856          * Jump address are located @ 0xC00
857          * For both, registers 0 to 3 are selected using bits 3 and 2, like
858          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
859          * I know this ain't pretty, but Avalon bus throws away the 2 least
860          * significant bits
861          */
862
863         /* start with memory RESET activated */
864
865         /* tINIT = 200us */
866
867         /*
868          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
869          * If a and b are the number of iteration in 2 nested loops
870          * it takes the following number of cycles to complete the operation:
871          * number_of_cycles = ((2 + n) * a + 2) * b
872          * where n is the number of instruction in the inner loop
873          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
874          * b = 6A
875          */
876
877         /* Load counters */
878         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL),
879                &sdr_rw_load_mgr_regs->load_cntr0);
880         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL),
881                &sdr_rw_load_mgr_regs->load_cntr1);
882         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL),
883                &sdr_rw_load_mgr_regs->load_cntr2);
884
885         /* Load jump address */
886         writel(RW_MGR_INIT_RESET_0_CKE_0,
887                 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
888         writel(RW_MGR_INIT_RESET_0_CKE_0,
889                 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
890         writel(RW_MGR_INIT_RESET_0_CKE_0,
891                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
892
893         /* Execute count instruction */
894         writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr);
895
896         /* indicate that memory is stable */
897         writel(1, &phy_mgr_cfg->reset_mem_stbl);
898
899         /*
900          * transition the RESET to high
901          * Wait for 500us
902          */
903
904         /*
905          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
906          * If a and b are the number of iteration in 2 nested loops
907          * it takes the following number of cycles to complete the operation
908          * number_of_cycles = ((2 + n) * a + 2) * b
909          * where n is the number of instruction in the inner loop
910          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
911          * b = FF
912          */
913
914         /* Load counters */
915         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL),
916                &sdr_rw_load_mgr_regs->load_cntr0);
917         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL),
918                &sdr_rw_load_mgr_regs->load_cntr1);
919         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL),
920                &sdr_rw_load_mgr_regs->load_cntr2);
921
922         /* Load jump address */
923         writel(RW_MGR_INIT_RESET_1_CKE_0,
924                 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
925         writel(RW_MGR_INIT_RESET_1_CKE_0,
926                 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
927         writel(RW_MGR_INIT_RESET_1_CKE_0,
928                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
929
930         writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr);
931
932         /* bring up clock enable */
933
934         /* tXRP < 250 ck cycles */
935         delay_for_n_mem_clocks(250);
936
937         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
938                 if (param->skip_ranks[r]) {
939                         /* request to skip the rank */
940                         continue;
941                 }
942
943                 /* set rank */
944                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
945
946                 /*
947                  * USER Use Mirror-ed commands for odd ranks if address
948                  * mirrorring is on
949                  */
950                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
951                         set_jump_as_return();
952                         writel(RW_MGR_MRS2_MIRR, grpaddr);
953                         delay_for_n_mem_clocks(4);
954                         set_jump_as_return();
955                         writel(RW_MGR_MRS3_MIRR, grpaddr);
956                         delay_for_n_mem_clocks(4);
957                         set_jump_as_return();
958                         writel(RW_MGR_MRS1_MIRR, grpaddr);
959                         delay_for_n_mem_clocks(4);
960                         set_jump_as_return();
961                         writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr);
962                 } else {
963                         set_jump_as_return();
964                         writel(RW_MGR_MRS2, grpaddr);
965                         delay_for_n_mem_clocks(4);
966                         set_jump_as_return();
967                         writel(RW_MGR_MRS3, grpaddr);
968                         delay_for_n_mem_clocks(4);
969                         set_jump_as_return();
970                         writel(RW_MGR_MRS1, grpaddr);
971                         set_jump_as_return();
972                         writel(RW_MGR_MRS0_DLL_RESET, grpaddr);
973                 }
974                 set_jump_as_return();
975                 writel(RW_MGR_ZQCL, grpaddr);
976
977                 /* tZQinit = tDLLK = 512 ck cycles */
978                 delay_for_n_mem_clocks(512);
979         }
980 }
981
982 /*
983  * At the end of calibration we have to program the user settings in, and
984  * USER  hand off the memory to the user.
985  */
986 static void rw_mgr_mem_handoff(void)
987 {
988         uint32_t r;
989         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
990                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
991
992         debug("%s:%d\n", __func__, __LINE__);
993         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
994                 if (param->skip_ranks[r])
995                         /* request to skip the rank */
996                         continue;
997                 /* set rank */
998                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
999
1000                 /* precharge all banks ... */
1001                 writel(RW_MGR_PRECHARGE_ALL, grpaddr);
1002
1003                 /* load up MR settings specified by user */
1004
1005                 /*
1006                  * Use Mirror-ed commands for odd ranks if address
1007                  * mirrorring is on
1008                  */
1009                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
1010                         set_jump_as_return();
1011                         writel(RW_MGR_MRS2_MIRR, grpaddr);
1012                         delay_for_n_mem_clocks(4);
1013                         set_jump_as_return();
1014                         writel(RW_MGR_MRS3_MIRR, grpaddr);
1015                         delay_for_n_mem_clocks(4);
1016                         set_jump_as_return();
1017                         writel(RW_MGR_MRS1_MIRR, grpaddr);
1018                         delay_for_n_mem_clocks(4);
1019                         set_jump_as_return();
1020                         writel(RW_MGR_MRS0_USER_MIRR, grpaddr);
1021                 } else {
1022                         set_jump_as_return();
1023                         writel(RW_MGR_MRS2, grpaddr);
1024                         delay_for_n_mem_clocks(4);
1025                         set_jump_as_return();
1026                         writel(RW_MGR_MRS3, grpaddr);
1027                         delay_for_n_mem_clocks(4);
1028                         set_jump_as_return();
1029                         writel(RW_MGR_MRS1, grpaddr);
1030                         delay_for_n_mem_clocks(4);
1031                         set_jump_as_return();
1032                         writel(RW_MGR_MRS0_USER, grpaddr);
1033                 }
1034                 /*
1035                  * USER  need to wait tMOD (12CK or 15ns) time before issuing
1036                  * other commands, but we will have plenty of NIOS cycles before
1037                  * actual handoff so its okay.
1038                  */
1039         }
1040 }
1041
1042 /*
1043  * performs a guaranteed read on the patterns we are going to use during a
1044  * read test to ensure memory works
1045  */
1046 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn,
1047         uint32_t group, uint32_t num_tries, uint32_t *bit_chk,
1048         uint32_t all_ranks)
1049 {
1050         uint32_t r, vg;
1051         uint32_t correct_mask_vg;
1052         uint32_t tmp_bit_chk;
1053         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1054                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1055         uint32_t addr;
1056         uint32_t base_rw_mgr;
1057
1058         *bit_chk = param->read_correct_mask;
1059         correct_mask_vg = param->read_correct_mask_vg;
1060
1061         for (r = rank_bgn; r < rank_end; r++) {
1062                 if (param->skip_ranks[r])
1063                         /* request to skip the rank */
1064                         continue;
1065
1066                 /* set rank */
1067                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1068
1069                 /* Load up a constant bursts of read commands */
1070                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1071                 writel(RW_MGR_GUARANTEED_READ,
1072                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1073
1074                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1075                 writel(RW_MGR_GUARANTEED_READ_CONT,
1076                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1077
1078                 tmp_bit_chk = 0;
1079                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1080                         /* reset the fifos to get pointers to known state */
1081
1082                         writel(0, &phy_mgr_cmd->fifo_reset);
1083                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1084                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1085
1086                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1087                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1088
1089                         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1090                         writel(RW_MGR_GUARANTEED_READ, addr +
1091                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1092                                 vg) << 2));
1093
1094                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1095                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr));
1096
1097                         if (vg == 0)
1098                                 break;
1099                 }
1100                 *bit_chk &= tmp_bit_chk;
1101         }
1102
1103         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1104         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1105
1106         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1107         debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\
1108                    %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask,
1109                    (long unsigned int)(*bit_chk == param->read_correct_mask));
1110         return *bit_chk == param->read_correct_mask;
1111 }
1112
1113 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks
1114         (uint32_t group, uint32_t num_tries, uint32_t *bit_chk)
1115 {
1116         return rw_mgr_mem_calibrate_read_test_patterns(0, group,
1117                 num_tries, bit_chk, 1);
1118 }
1119
1120 /* load up the patterns we are going to use during a read test */
1121 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn,
1122         uint32_t all_ranks)
1123 {
1124         uint32_t r;
1125         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1126                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1127
1128         debug("%s:%d\n", __func__, __LINE__);
1129         for (r = rank_bgn; r < rank_end; r++) {
1130                 if (param->skip_ranks[r])
1131                         /* request to skip the rank */
1132                         continue;
1133
1134                 /* set rank */
1135                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1136
1137                 /* Load up a constant bursts */
1138                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1139
1140                 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1141                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1142
1143                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1144
1145                 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1146                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1147
1148                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1149
1150                 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1151                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1152
1153                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1154
1155                 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1156                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1157
1158                 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1159                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1160         }
1161
1162         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1163 }
1164
1165 /*
1166  * try a read and see if it returns correct data back. has dummy reads
1167  * inserted into the mix used to align dqs enable. has more thorough checks
1168  * than the regular read test.
1169  */
1170 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group,
1171         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1172         uint32_t all_groups, uint32_t all_ranks)
1173 {
1174         uint32_t r, vg;
1175         uint32_t correct_mask_vg;
1176         uint32_t tmp_bit_chk;
1177         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1178                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1179         uint32_t addr;
1180         uint32_t base_rw_mgr;
1181
1182         *bit_chk = param->read_correct_mask;
1183         correct_mask_vg = param->read_correct_mask_vg;
1184
1185         uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) &
1186                 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION);
1187
1188         for (r = rank_bgn; r < rank_end; r++) {
1189                 if (param->skip_ranks[r])
1190                         /* request to skip the rank */
1191                         continue;
1192
1193                 /* set rank */
1194                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1195
1196                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1197
1198                 writel(RW_MGR_READ_B2B_WAIT1,
1199                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1200
1201                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1202                 writel(RW_MGR_READ_B2B_WAIT2,
1203                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1204
1205                 if (quick_read_mode)
1206                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1207                         /* need at least two (1+1) reads to capture failures */
1208                 else if (all_groups)
1209                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1210                 else
1211                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1212
1213                 writel(RW_MGR_READ_B2B,
1214                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1215                 if (all_groups)
1216                         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1217                                RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1218                                &sdr_rw_load_mgr_regs->load_cntr3);
1219                 else
1220                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1221
1222                 writel(RW_MGR_READ_B2B,
1223                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1224
1225                 tmp_bit_chk = 0;
1226                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1227                         /* reset the fifos to get pointers to known state */
1228                         writel(0, &phy_mgr_cmd->fifo_reset);
1229                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1230                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1231
1232                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1233                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1234
1235                         if (all_groups)
1236                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET;
1237                         else
1238                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1239
1240                         writel(RW_MGR_READ_B2B, addr +
1241                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1242                                vg) << 2));
1243
1244                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1245                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
1246
1247                         if (vg == 0)
1248                                 break;
1249                 }
1250                 *bit_chk &= tmp_bit_chk;
1251         }
1252
1253         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1254         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1255
1256         if (all_correct) {
1257                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1258                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\
1259                            (%u == %u) => %lu", __func__, __LINE__, group,
1260                            all_groups, *bit_chk, param->read_correct_mask,
1261                            (long unsigned int)(*bit_chk ==
1262                            param->read_correct_mask));
1263                 return *bit_chk == param->read_correct_mask;
1264         } else  {
1265                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1266                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\
1267                            (%u != %lu) => %lu\n", __func__, __LINE__,
1268                            group, all_groups, *bit_chk, (long unsigned int)0,
1269                            (long unsigned int)(*bit_chk != 0x00));
1270                 return *bit_chk != 0x00;
1271         }
1272 }
1273
1274 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group,
1275         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1276         uint32_t all_groups)
1277 {
1278         return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct,
1279                                               bit_chk, all_groups, 1);
1280 }
1281
1282 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v)
1283 {
1284         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1285         (*v)++;
1286 }
1287
1288 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v)
1289 {
1290         uint32_t i;
1291
1292         for (i = 0; i < VFIFO_SIZE-1; i++)
1293                 rw_mgr_incr_vfifo(grp, v);
1294 }
1295
1296 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk)
1297 {
1298         uint32_t  v;
1299         uint32_t fail_cnt = 0;
1300         uint32_t test_status;
1301
1302         for (v = 0; v < VFIFO_SIZE; ) {
1303                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n",
1304                            __func__, __LINE__, v);
1305                 test_status = rw_mgr_mem_calibrate_read_test_all_ranks
1306                         (grp, 1, PASS_ONE_BIT, bit_chk, 0);
1307                 if (!test_status) {
1308                         fail_cnt++;
1309
1310                         if (fail_cnt == 2)
1311                                 break;
1312                 }
1313
1314                 /* fiddle with FIFO */
1315                 rw_mgr_incr_vfifo(grp, &v);
1316         }
1317
1318         if (v >= VFIFO_SIZE) {
1319                 /* no failing read found!! Something must have gone wrong */
1320                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n",
1321                            __func__, __LINE__);
1322                 return 0;
1323         } else {
1324                 return v;
1325         }
1326 }
1327
1328 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
1329                               uint32_t dtaps_per_ptap, uint32_t *work_bgn,
1330                               uint32_t *v, uint32_t *d, uint32_t *p,
1331                               uint32_t *i, uint32_t *max_working_cnt)
1332 {
1333         uint32_t found_begin = 0;
1334         uint32_t tmp_delay = 0;
1335         uint32_t test_status;
1336
1337         for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
1338                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1339                 *work_bgn = tmp_delay;
1340                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1341
1342                 for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
1343                         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
1344                                 IO_DELAY_PER_OPA_TAP) {
1345                                 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1346
1347                                 test_status =
1348                                 rw_mgr_mem_calibrate_read_test_all_ranks
1349                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0);
1350
1351                                 if (test_status) {
1352                                         *max_working_cnt = 1;
1353                                         found_begin = 1;
1354                                         break;
1355                                 }
1356                         }
1357
1358                         if (found_begin)
1359                                 break;
1360
1361                         if (*p > IO_DQS_EN_PHASE_MAX)
1362                                 /* fiddle with FIFO */
1363                                 rw_mgr_incr_vfifo(*grp, v);
1364                 }
1365
1366                 if (found_begin)
1367                         break;
1368         }
1369
1370         if (*i >= VFIFO_SIZE) {
1371                 /* cannot find working solution */
1372                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\
1373                            ptap/dtap\n", __func__, __LINE__);
1374                 return 0;
1375         } else {
1376                 return 1;
1377         }
1378 }
1379
1380 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk,
1381                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1382                              uint32_t *p, uint32_t *max_working_cnt)
1383 {
1384         uint32_t found_begin = 0;
1385         uint32_t tmp_delay;
1386
1387         /* Special case code for backing up a phase */
1388         if (*p == 0) {
1389                 *p = IO_DQS_EN_PHASE_MAX;
1390                 rw_mgr_decr_vfifo(*grp, v);
1391         } else {
1392                 (*p)--;
1393         }
1394         tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1395         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1396
1397         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn;
1398                 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1399                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1400
1401                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1402                                                              PASS_ONE_BIT,
1403                                                              bit_chk, 0)) {
1404                         found_begin = 1;
1405                         *work_bgn = tmp_delay;
1406                         break;
1407                 }
1408         }
1409
1410         /* We have found a working dtap before the ptap found above */
1411         if (found_begin == 1)
1412                 (*max_working_cnt)++;
1413
1414         /*
1415          * Restore VFIFO to old state before we decremented it
1416          * (if needed).
1417          */
1418         (*p)++;
1419         if (*p > IO_DQS_EN_PHASE_MAX) {
1420                 *p = 0;
1421                 rw_mgr_incr_vfifo(*grp, v);
1422         }
1423
1424         scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0);
1425 }
1426
1427 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk,
1428                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1429                              uint32_t *p, uint32_t *i, uint32_t *max_working_cnt,
1430                              uint32_t *work_end)
1431 {
1432         uint32_t found_end = 0;
1433
1434         (*p)++;
1435         *work_end += IO_DELAY_PER_OPA_TAP;
1436         if (*p > IO_DQS_EN_PHASE_MAX) {
1437                 /* fiddle with FIFO */
1438                 *p = 0;
1439                 rw_mgr_incr_vfifo(*grp, v);
1440         }
1441
1442         for (; *i < VFIFO_SIZE + 1; (*i)++) {
1443                 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end
1444                         += IO_DELAY_PER_OPA_TAP) {
1445                         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1446
1447                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1448                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) {
1449                                 found_end = 1;
1450                                 break;
1451                         } else {
1452                                 (*max_working_cnt)++;
1453                         }
1454                 }
1455
1456                 if (found_end)
1457                         break;
1458
1459                 if (*p > IO_DQS_EN_PHASE_MAX) {
1460                         /* fiddle with FIFO */
1461                         rw_mgr_incr_vfifo(*grp, v);
1462                         *p = 0;
1463                 }
1464         }
1465
1466         if (*i >= VFIFO_SIZE + 1) {
1467                 /* cannot see edge of failing read */
1468                 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\
1469                            failed\n", __func__, __LINE__);
1470                 return 0;
1471         } else {
1472                 return 1;
1473         }
1474 }
1475
1476 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk,
1477                                   uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1478                                   uint32_t *p, uint32_t *work_mid,
1479                                   uint32_t *work_end)
1480 {
1481         int i;
1482         int tmp_delay = 0;
1483
1484         *work_mid = (*work_bgn + *work_end) / 2;
1485
1486         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1487                    *work_bgn, *work_end, *work_mid);
1488         /* Get the middle delay to be less than a VFIFO delay */
1489         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX;
1490                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1491                 ;
1492         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1493         while (*work_mid > tmp_delay)
1494                 *work_mid -= tmp_delay;
1495         debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid);
1496
1497         tmp_delay = 0;
1498         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid;
1499                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1500                 ;
1501         tmp_delay -= IO_DELAY_PER_OPA_TAP;
1502         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay);
1503         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++,
1504                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP)
1505                 ;
1506         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay);
1507
1508         scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1);
1509         scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1510
1511         /*
1512          * push vfifo until we can successfully calibrate. We can do this
1513          * because the largest possible margin in 1 VFIFO cycle.
1514          */
1515         for (i = 0; i < VFIFO_SIZE; i++) {
1516                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n",
1517                            *v);
1518                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1519                                                              PASS_ONE_BIT,
1520                                                              bit_chk, 0)) {
1521                         break;
1522                 }
1523
1524                 /* fiddle with FIFO */
1525                 rw_mgr_incr_vfifo(*grp, v);
1526         }
1527
1528         if (i >= VFIFO_SIZE) {
1529                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \
1530                            failed\n", __func__, __LINE__);
1531                 return 0;
1532         } else {
1533                 return 1;
1534         }
1535 }
1536
1537 /* find a good dqs enable to use */
1538 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp)
1539 {
1540         uint32_t v, d, p, i;
1541         uint32_t max_working_cnt;
1542         uint32_t bit_chk;
1543         uint32_t dtaps_per_ptap;
1544         uint32_t work_bgn, work_mid, work_end;
1545         uint32_t found_passing_read, found_failing_read, initial_failing_dtap;
1546
1547         debug("%s:%d %u\n", __func__, __LINE__, grp);
1548
1549         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1550
1551         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1552         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1553
1554         /* ************************************************************** */
1555         /* * Step 0 : Determine number of delay taps for each phase tap * */
1556         dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1557
1558         /* ********************************************************* */
1559         /* * Step 1 : First push vfifo until we get a failing read * */
1560         v = find_vfifo_read(grp, &bit_chk);
1561
1562         max_working_cnt = 0;
1563
1564         /* ******************************************************** */
1565         /* * step 2: find first working phase, increment in ptaps * */
1566         work_bgn = 0;
1567         if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d,
1568                                 &p, &i, &max_working_cnt) == 0)
1569                 return 0;
1570
1571         work_end = work_bgn;
1572
1573         /*
1574          * If d is 0 then the working window covers a phase tap and
1575          * we can follow the old procedure otherwise, we've found the beginning,
1576          * and we need to increment the dtaps until we find the end.
1577          */
1578         if (d == 0) {
1579                 /* ********************************************************* */
1580                 /* * step 3a: if we have room, back off by one and
1581                 increment in dtaps * */
1582
1583                 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1584                                  &max_working_cnt);
1585
1586                 /* ********************************************************* */
1587                 /* * step 4a: go forward from working phase to non working
1588                 phase, increment in ptaps * */
1589                 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1590                                          &i, &max_working_cnt, &work_end) == 0)
1591                         return 0;
1592
1593                 /* ********************************************************* */
1594                 /* * step 5a:  back off one from last, increment in dtaps  * */
1595
1596                 /* Special case code for backing up a phase */
1597                 if (p == 0) {
1598                         p = IO_DQS_EN_PHASE_MAX;
1599                         rw_mgr_decr_vfifo(grp, &v);
1600                 } else {
1601                         p = p - 1;
1602                 }
1603
1604                 work_end -= IO_DELAY_PER_OPA_TAP;
1605                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1606
1607                 /* * The actual increment of dtaps is done outside of
1608                 the if/else loop to share code */
1609                 d = 0;
1610
1611                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \
1612                            vfifo=%u ptap=%u\n", __func__, __LINE__,
1613                            v, p);
1614         } else {
1615                 /* ******************************************************* */
1616                 /* * step 3-5b:  Find the right edge of the window using
1617                 delay taps   * */
1618                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
1619                            ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
1620                            v, p, d, work_bgn);
1621
1622                 work_end = work_bgn;
1623
1624                 /* * The actual increment of dtaps is done outside of the
1625                 if/else loop to share code */
1626
1627                 /* Only here to counterbalance a subtract later on which is
1628                 not needed if this branch of the algorithm is taken */
1629                 max_working_cnt++;
1630         }
1631
1632         /* The dtap increment to find the failing edge is done here */
1633         for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end +=
1634                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1635                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1636                                    end-2: dtap=%u\n", __func__, __LINE__, d);
1637                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1638
1639                         if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1640                                                                       PASS_ONE_BIT,
1641                                                                       &bit_chk, 0)) {
1642                                 break;
1643                         }
1644         }
1645
1646         /* Go back to working dtap */
1647         if (d != 0)
1648                 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1649
1650         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \
1651                    ptap=%u dtap=%u end=%u\n", __func__, __LINE__,
1652                    v, p, d-1, work_end);
1653
1654         if (work_end < work_bgn) {
1655                 /* nil range */
1656                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \
1657                            failed\n", __func__, __LINE__);
1658                 return 0;
1659         }
1660
1661         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n",
1662                    __func__, __LINE__, work_bgn, work_end);
1663
1664         /* *************************************************************** */
1665         /*
1666          * * We need to calculate the number of dtaps that equal a ptap
1667          * * To do that we'll back up a ptap and re-find the edge of the
1668          * * window using dtaps
1669          */
1670
1671         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \
1672                    for tracking\n", __func__, __LINE__);
1673
1674         /* Special case code for backing up a phase */
1675         if (p == 0) {
1676                 p = IO_DQS_EN_PHASE_MAX;
1677                 rw_mgr_decr_vfifo(grp, &v);
1678                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1679                            cycle/phase: v=%u p=%u\n", __func__, __LINE__,
1680                            v, p);
1681         } else {
1682                 p = p - 1;
1683                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1684                            phase only: v=%u p=%u", __func__, __LINE__,
1685                            v, p);
1686         }
1687
1688         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1689
1690         /*
1691          * Increase dtap until we first see a passing read (in case the
1692          * window is smaller than a ptap),
1693          * and then a failing read to mark the edge of the window again
1694          */
1695
1696         /* Find a passing read */
1697         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n",
1698                    __func__, __LINE__);
1699         found_passing_read = 0;
1700         found_failing_read = 0;
1701         initial_failing_dtap = d;
1702         for (; d <= IO_DQS_EN_DELAY_MAX; d++) {
1703                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \
1704                            read d=%u\n", __func__, __LINE__, d);
1705                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1706
1707                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1708                                                              PASS_ONE_BIT,
1709                                                              &bit_chk, 0)) {
1710                         found_passing_read = 1;
1711                         break;
1712                 }
1713         }
1714
1715         if (found_passing_read) {
1716                 /* Find a failing read */
1717                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \
1718                            read\n", __func__, __LINE__);
1719                 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) {
1720                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1721                                    testing read d=%u\n", __func__, __LINE__, d);
1722                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1723
1724                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1725                                 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) {
1726                                 found_failing_read = 1;
1727                                 break;
1728                         }
1729                 }
1730         } else {
1731                 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \
1732                            calculate dtaps", __func__, __LINE__);
1733                 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n");
1734         }
1735
1736         /*
1737          * The dynamically calculated dtaps_per_ptap is only valid if we
1738          * found a passing/failing read. If we didn't, it means d hit the max
1739          * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1740          * statically calculated value.
1741          */
1742         if (found_passing_read && found_failing_read)
1743                 dtaps_per_ptap = d - initial_failing_dtap;
1744
1745         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1746         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \
1747                    - %u = %u",  __func__, __LINE__, d,
1748                    initial_failing_dtap, dtaps_per_ptap);
1749
1750         /* ******************************************** */
1751         /* * step 6:  Find the centre of the window   * */
1752         if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1753                                    &work_mid, &work_end) == 0)
1754                 return 0;
1755
1756         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \
1757                    vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__,
1758                    v, p-1, d);
1759         return 1;
1760 }
1761
1762 /*
1763  * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
1764  * dq_in_delay values
1765  */
1766 static uint32_t
1767 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
1768 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn)
1769 {
1770         uint32_t found;
1771         uint32_t i;
1772         uint32_t p;
1773         uint32_t d;
1774         uint32_t r;
1775
1776         const uint32_t delay_step = IO_IO_IN_DELAY_MAX /
1777                 (RW_MGR_MEM_DQ_PER_READ_DQS-1);
1778                 /* we start at zero, so have one less dq to devide among */
1779
1780         debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group,
1781               test_bgn);
1782
1783         /* try different dq_in_delays since the dq path is shorter than dqs */
1784
1785         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1786              r += NUM_RANKS_PER_SHADOW_REG) {
1787                 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) {
1788                         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\
1789                                    vfifo_find_dqs_", __func__, __LINE__);
1790                         debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ",
1791                                write_group, read_group);
1792                         debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d);
1793                         scc_mgr_set_dq_in_delay(p, d);
1794                         scc_mgr_load_dq(p);
1795                 }
1796                 writel(0, &sdr_scc_mgr->update);
1797         }
1798
1799         found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group);
1800
1801         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\
1802                    en_phase_sweep_dq", __func__, __LINE__);
1803         debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \
1804                    chain to zero\n", write_group, read_group, found);
1805
1806         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1807              r += NUM_RANKS_PER_SHADOW_REG) {
1808                 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1809                         i++, p++) {
1810                         scc_mgr_set_dq_in_delay(p, 0);
1811                         scc_mgr_load_dq(p);
1812                 }
1813                 writel(0, &sdr_scc_mgr->update);
1814         }
1815
1816         return found;
1817 }
1818
1819 /* per-bit deskew DQ and center */
1820 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn,
1821         uint32_t write_group, uint32_t read_group, uint32_t test_bgn,
1822         uint32_t use_read_test, uint32_t update_fom)
1823 {
1824         uint32_t i, p, d, min_index;
1825         /*
1826          * Store these as signed since there are comparisons with
1827          * signed numbers.
1828          */
1829         uint32_t bit_chk;
1830         uint32_t sticky_bit_chk;
1831         int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1832         int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1833         int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS];
1834         int32_t mid;
1835         int32_t orig_mid_min, mid_min;
1836         int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs,
1837                 final_dqs_en;
1838         int32_t dq_margin, dqs_margin;
1839         uint32_t stop;
1840         uint32_t temp_dq_in_delay1, temp_dq_in_delay2;
1841         uint32_t addr;
1842
1843         debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn);
1844
1845         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET;
1846         start_dqs = readl(addr + (read_group << 2));
1847         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
1848                 start_dqs_en = readl(addr + ((read_group << 2)
1849                                      - IO_DQS_EN_DELAY_OFFSET));
1850
1851         /* set the left and right edge of each bit to an illegal value */
1852         /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
1853         sticky_bit_chk = 0;
1854         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1855                 left_edge[i]  = IO_IO_IN_DELAY_MAX + 1;
1856                 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1857         }
1858
1859         /* Search for the left edge of the window for each bit */
1860         for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) {
1861                 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d);
1862
1863                 writel(0, &sdr_scc_mgr->update);
1864
1865                 /*
1866                  * Stop searching when the read test doesn't pass AND when
1867                  * we've seen a passing read on every bit.
1868                  */
1869                 if (use_read_test) {
1870                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1871                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1872                                 &bit_chk, 0, 0);
1873                 } else {
1874                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1875                                                         0, PASS_ONE_BIT,
1876                                                         &bit_chk, 0);
1877                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1878                                 (read_group - (write_group *
1879                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1880                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1881                         stop = (bit_chk == 0);
1882                 }
1883                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1884                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1885                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \
1886                            && %u", __func__, __LINE__, d,
1887                            sticky_bit_chk,
1888                         param->read_correct_mask, stop);
1889
1890                 if (stop == 1) {
1891                         break;
1892                 } else {
1893                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1894                                 if (bit_chk & 1) {
1895                                         /* Remember a passing test as the
1896                                         left_edge */
1897                                         left_edge[i] = d;
1898                                 } else {
1899                                         /* If a left edge has not been seen yet,
1900                                         then a future passing test will mark
1901                                         this edge as the right edge */
1902                                         if (left_edge[i] ==
1903                                                 IO_IO_IN_DELAY_MAX + 1) {
1904                                                 right_edge[i] = -(d + 1);
1905                                         }
1906                                 }
1907                                 bit_chk = bit_chk >> 1;
1908                         }
1909                 }
1910         }
1911
1912         /* Reset DQ delay chains to 0 */
1913         scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
1914         sticky_bit_chk = 0;
1915         for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) {
1916                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
1917                            %d right_edge[%u]: %d\n", __func__, __LINE__,
1918                            i, left_edge[i], i, right_edge[i]);
1919
1920                 /*
1921                  * Check for cases where we haven't found the left edge,
1922                  * which makes our assignment of the the right edge invalid.
1923                  * Reset it to the illegal value.
1924                  */
1925                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && (
1926                         right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1927                         right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1928                         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \
1929                                    right_edge[%u]: %d\n", __func__, __LINE__,
1930                                    i, right_edge[i]);
1931                 }
1932
1933                 /*
1934                  * Reset sticky bit (except for bits where we have seen
1935                  * both the left and right edge).
1936                  */
1937                 sticky_bit_chk = sticky_bit_chk << 1;
1938                 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) &&
1939                     (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1940                         sticky_bit_chk = sticky_bit_chk | 1;
1941                 }
1942
1943                 if (i == 0)
1944                         break;
1945         }
1946
1947         /* Search for the right edge of the window for each bit */
1948         for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) {
1949                 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
1950                 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
1951                         uint32_t delay = d + start_dqs_en;
1952                         if (delay > IO_DQS_EN_DELAY_MAX)
1953                                 delay = IO_DQS_EN_DELAY_MAX;
1954                         scc_mgr_set_dqs_en_delay(read_group, delay);
1955                 }
1956                 scc_mgr_load_dqs(read_group);
1957
1958                 writel(0, &sdr_scc_mgr->update);
1959
1960                 /*
1961                  * Stop searching when the read test doesn't pass AND when
1962                  * we've seen a passing read on every bit.
1963                  */
1964                 if (use_read_test) {
1965                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1966                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1967                                 &bit_chk, 0, 0);
1968                 } else {
1969                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1970                                                         0, PASS_ONE_BIT,
1971                                                         &bit_chk, 0);
1972                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1973                                 (read_group - (write_group *
1974                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1975                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1976                         stop = (bit_chk == 0);
1977                 }
1978                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1979                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1980
1981                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \
1982                            %u && %u", __func__, __LINE__, d,
1983                            sticky_bit_chk, param->read_correct_mask, stop);
1984
1985                 if (stop == 1) {
1986                         break;
1987                 } else {
1988                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1989                                 if (bit_chk & 1) {
1990                                         /* Remember a passing test as
1991                                         the right_edge */
1992                                         right_edge[i] = d;
1993                                 } else {
1994                                         if (d != 0) {
1995                                                 /* If a right edge has not been
1996                                                 seen yet, then a future passing
1997                                                 test will mark this edge as the
1998                                                 left edge */
1999                                                 if (right_edge[i] ==
2000                                                 IO_IO_IN_DELAY_MAX + 1) {
2001                                                         left_edge[i] = -(d + 1);
2002                                                 }
2003                                         } else {
2004                                                 /* d = 0 failed, but it passed
2005                                                 when testing the left edge,
2006                                                 so it must be marginal,
2007                                                 set it to -1 */
2008                                                 if (right_edge[i] ==
2009                                                         IO_IO_IN_DELAY_MAX + 1 &&
2010                                                         left_edge[i] !=
2011                                                         IO_IO_IN_DELAY_MAX
2012                                                         + 1) {
2013                                                         right_edge[i] = -1;
2014                                                 }
2015                                                 /* If a right edge has not been
2016                                                 seen yet, then a future passing
2017                                                 test will mark this edge as the
2018                                                 left edge */
2019                                                 else if (right_edge[i] ==
2020                                                         IO_IO_IN_DELAY_MAX +
2021                                                         1) {
2022                                                         left_edge[i] = -(d + 1);
2023                                                 }
2024                                         }
2025                                 }
2026
2027                                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\
2028                                            d=%u]: ", __func__, __LINE__, d);
2029                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ",
2030                                            (int)(bit_chk & 1), i, left_edge[i]);
2031                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2032                                            right_edge[i]);
2033                                 bit_chk = bit_chk >> 1;
2034                         }
2035                 }
2036         }
2037
2038         /* Check that all bits have a window */
2039         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2040                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
2041                            %d right_edge[%u]: %d", __func__, __LINE__,
2042                            i, left_edge[i], i, right_edge[i]);
2043                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i]
2044                         == IO_IO_IN_DELAY_MAX + 1)) {
2045                         /*
2046                          * Restore delay chain settings before letting the loop
2047                          * in rw_mgr_mem_calibrate_vfifo to retry different
2048                          * dqs/ck relationships.
2049                          */
2050                         scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs);
2051                         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2052                                 scc_mgr_set_dqs_en_delay(read_group,
2053                                                          start_dqs_en);
2054                         }
2055                         scc_mgr_load_dqs(read_group);
2056                         writel(0, &sdr_scc_mgr->update);
2057
2058                         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \
2059                                    find edge [%u]: %d %d", __func__, __LINE__,
2060                                    i, left_edge[i], right_edge[i]);
2061                         if (use_read_test) {
2062                                 set_failing_group_stage(read_group *
2063                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2064                                         CAL_STAGE_VFIFO,
2065                                         CAL_SUBSTAGE_VFIFO_CENTER);
2066                         } else {
2067                                 set_failing_group_stage(read_group *
2068                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2069                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2070                                         CAL_SUBSTAGE_VFIFO_CENTER);
2071                         }
2072                         return 0;
2073                 }
2074         }
2075
2076         /* Find middle of window for each DQ bit */
2077         mid_min = left_edge[0] - right_edge[0];
2078         min_index = 0;
2079         for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2080                 mid = left_edge[i] - right_edge[i];
2081                 if (mid < mid_min) {
2082                         mid_min = mid;
2083                         min_index = i;
2084                 }
2085         }
2086
2087         /*
2088          * -mid_min/2 represents the amount that we need to move DQS.
2089          * If mid_min is odd and positive we'll need to add one to
2090          * make sure the rounding in further calculations is correct
2091          * (always bias to the right), so just add 1 for all positive values.
2092          */
2093         if (mid_min > 0)
2094                 mid_min++;
2095
2096         mid_min = mid_min / 2;
2097
2098         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n",
2099                    __func__, __LINE__, mid_min, min_index);
2100
2101         /* Determine the amount we can change DQS (which is -mid_min) */
2102         orig_mid_min = mid_min;
2103         new_dqs = start_dqs - mid_min;
2104         if (new_dqs > IO_DQS_IN_DELAY_MAX)
2105                 new_dqs = IO_DQS_IN_DELAY_MAX;
2106         else if (new_dqs < 0)
2107                 new_dqs = 0;
2108
2109         mid_min = start_dqs - new_dqs;
2110         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2111                    mid_min, new_dqs);
2112
2113         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2114                 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2115                         mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2116                 else if (start_dqs_en - mid_min < 0)
2117                         mid_min += start_dqs_en - mid_min;
2118         }
2119         new_dqs = start_dqs - mid_min;
2120
2121         debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \
2122                    new_dqs=%d mid_min=%d\n", start_dqs,
2123                    IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2124                    new_dqs, mid_min);
2125
2126         /* Initialize data for export structures */
2127         dqs_margin = IO_IO_IN_DELAY_MAX + 1;
2128         dq_margin  = IO_IO_IN_DELAY_MAX + 1;
2129
2130         /* add delay to bring centre of all DQ windows to the same "level" */
2131         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
2132                 /* Use values before divide by 2 to reduce round off error */
2133                 shift_dq = (left_edge[i] - right_edge[i] -
2134                         (left_edge[min_index] - right_edge[min_index]))/2  +
2135                         (orig_mid_min - mid_min);
2136
2137                 debug_cond(DLEVEL == 2, "vfifo_center: before: \
2138                            shift_dq[%u]=%d\n", i, shift_dq);
2139
2140                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
2141                 temp_dq_in_delay1 = readl(addr + (p << 2));
2142                 temp_dq_in_delay2 = readl(addr + (i << 2));
2143
2144                 if (shift_dq + (int32_t)temp_dq_in_delay1 >
2145                         (int32_t)IO_IO_IN_DELAY_MAX) {
2146                         shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2;
2147                 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) {
2148                         shift_dq = -(int32_t)temp_dq_in_delay1;
2149                 }
2150                 debug_cond(DLEVEL == 2, "vfifo_center: after: \
2151                            shift_dq[%u]=%d\n", i, shift_dq);
2152                 final_dq[i] = temp_dq_in_delay1 + shift_dq;
2153                 scc_mgr_set_dq_in_delay(p, final_dq[i]);
2154                 scc_mgr_load_dq(p);
2155
2156                 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i,
2157                            left_edge[i] - shift_dq + (-mid_min),
2158                            right_edge[i] + shift_dq - (-mid_min));
2159                 /* To determine values for export structures */
2160                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2161                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2162
2163                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2164                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2165         }
2166
2167         final_dqs = new_dqs;
2168         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2169                 final_dqs_en = start_dqs_en - mid_min;
2170
2171         /* Move DQS-en */
2172         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2173                 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en);
2174                 scc_mgr_load_dqs(read_group);
2175         }
2176
2177         /* Move DQS */
2178         scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs);
2179         scc_mgr_load_dqs(read_group);
2180         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \
2181                    dqs_margin=%d", __func__, __LINE__,
2182                    dq_margin, dqs_margin);
2183
2184         /*
2185          * Do not remove this line as it makes sure all of our decisions
2186          * have been applied. Apply the update bit.
2187          */
2188         writel(0, &sdr_scc_mgr->update);
2189
2190         return (dq_margin >= 0) && (dqs_margin >= 0);
2191 }
2192
2193 /*
2194  * calibrate the read valid prediction FIFO.
2195  *
2196  *  - read valid prediction will consist of finding a good DQS enable phase,
2197  * DQS enable delay, DQS input phase, and DQS input delay.
2198  *  - we also do a per-bit deskew on the DQ lines.
2199  */
2200 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
2201                                            uint32_t test_bgn)
2202 {
2203         uint32_t p, d, rank_bgn, sr;
2204         uint32_t dtaps_per_ptap;
2205         uint32_t tmp_delay;
2206         uint32_t bit_chk;
2207         uint32_t grp_calibrated;
2208         uint32_t write_group, write_test_bgn;
2209         uint32_t failed_substage;
2210
2211         debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn);
2212
2213         /* update info for sims */
2214         reg_file_set_stage(CAL_STAGE_VFIFO);
2215
2216         write_group = read_group;
2217         write_test_bgn = test_bgn;
2218
2219         /* USER Determine number of delay taps for each phase tap */
2220         dtaps_per_ptap = 0;
2221         tmp_delay = 0;
2222         while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
2223                 dtaps_per_ptap++;
2224                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
2225         }
2226         dtaps_per_ptap--;
2227         tmp_delay = 0;
2228
2229         /* update info for sims */
2230         reg_file_set_group(read_group);
2231
2232         grp_calibrated = 0;
2233
2234         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2235         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2236
2237         for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) {
2238                 /*
2239                  * In RLDRAMX we may be messing the delay of pins in
2240                  * the same write group but outside of the current read
2241                  * the group, but that's ok because we haven't
2242                  * calibrated output side yet.
2243                  */
2244                 if (d > 0) {
2245                         scc_mgr_apply_group_all_out_delay_add_all_ranks
2246                         (write_group, write_test_bgn, d);
2247                 }
2248
2249                 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0;
2250                         p++) {
2251                         /* set a particular dqdqs phase */
2252                         scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p);
2253
2254                         debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \
2255                                    p=%u d=%u\n", __func__, __LINE__,
2256                                    read_group, p, d);
2257
2258                         /*
2259                          * Load up the patterns used by read calibration
2260                          * using current DQDQS phase.
2261                          */
2262                         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2263                         if (!(gbl->phy_debug_mode_flags &
2264                                 PHY_DEBUG_DISABLE_GUARANTEED_READ)) {
2265                                 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks
2266                                     (read_group, 1, &bit_chk)) {
2267                                         debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:",
2268                                                    __func__, __LINE__);
2269                                         debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n",
2270                                                    read_group, p, d);
2271                                         break;
2272                                 }
2273                         }
2274
2275 /* case:56390 */
2276                         grp_calibrated = 1;
2277                 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
2278                     (write_group, read_group, test_bgn)) {
2279                                 /*
2280                                  * USER Read per-bit deskew can be done on a
2281                                  * per shadow register basis.
2282                                  */
2283                                 for (rank_bgn = 0, sr = 0;
2284                                         rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2285                                         rank_bgn += NUM_RANKS_PER_SHADOW_REG,
2286                                         ++sr) {
2287                                         /*
2288                                          * Determine if this set of ranks
2289                                          * should be skipped entirely.
2290                                          */
2291                                         if (!param->skip_shadow_regs[sr]) {
2292                                                 /*
2293                                                  * If doing read after write
2294                                                  * calibration, do not update
2295                                                  * FOM, now - do it then.
2296                                                  */
2297                                         if (!rw_mgr_mem_calibrate_vfifo_center
2298                                                 (rank_bgn, write_group,
2299                                                 read_group, test_bgn, 1, 0)) {
2300                                                         grp_calibrated = 0;
2301                                                         failed_substage =
2302                                                 CAL_SUBSTAGE_VFIFO_CENTER;
2303                                                 }
2304                                         }
2305                                 }
2306                         } else {
2307                                 grp_calibrated = 0;
2308                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2309                         }
2310                 }
2311         }
2312
2313         if (grp_calibrated == 0) {
2314                 set_failing_group_stage(write_group, CAL_STAGE_VFIFO,
2315                                         failed_substage);
2316                 return 0;
2317         }
2318
2319         /*
2320          * Reset the delay chains back to zero if they have moved > 1
2321          * (check for > 1 because loop will increase d even when pass in
2322          * first case).
2323          */
2324         if (d > 2)
2325                 scc_mgr_zero_group(write_group, 1);
2326
2327         return 1;
2328 }
2329
2330 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */
2331 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
2332                                                uint32_t test_bgn)
2333 {
2334         uint32_t rank_bgn, sr;
2335         uint32_t grp_calibrated;
2336         uint32_t write_group;
2337
2338         debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
2339
2340         /* update info for sims */
2341
2342         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2343         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2344
2345         write_group = read_group;
2346
2347         /* update info for sims */
2348         reg_file_set_group(read_group);
2349
2350         grp_calibrated = 1;
2351         /* Read per-bit deskew can be done on a per shadow register basis */
2352         for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2353                 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
2354                 /* Determine if this set of ranks should be skipped entirely */
2355                 if (!param->skip_shadow_regs[sr]) {
2356                 /* This is the last calibration round, update FOM here */
2357                         if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
2358                                                                 write_group,
2359                                                                 read_group,
2360                                                                 test_bgn, 0,
2361                                                                 1)) {
2362                                 grp_calibrated = 0;
2363                         }
2364                 }
2365         }
2366
2367
2368         if (grp_calibrated == 0) {
2369                 set_failing_group_stage(write_group,
2370                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2371                                         CAL_SUBSTAGE_VFIFO_CENTER);
2372                 return 0;
2373         }
2374
2375         return 1;
2376 }
2377
2378 /* Calibrate LFIFO to find smallest read latency */
2379 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2380 {
2381         uint32_t found_one;
2382         uint32_t bit_chk;
2383
2384         debug("%s:%d\n", __func__, __LINE__);
2385
2386         /* update info for sims */
2387         reg_file_set_stage(CAL_STAGE_LFIFO);
2388         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2389
2390         /* Load up the patterns used by read calibration for all ranks */
2391         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2392         found_one = 0;
2393
2394         do {
2395                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2396                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2397                            __func__, __LINE__, gbl->curr_read_lat);
2398
2399                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
2400                                                               NUM_READ_TESTS,
2401                                                               PASS_ALL_BITS,
2402                                                               &bit_chk, 1)) {
2403                         break;
2404                 }
2405
2406                 found_one = 1;
2407                 /* reduce read latency and see if things are working */
2408                 /* correctly */
2409                 gbl->curr_read_lat--;
2410         } while (gbl->curr_read_lat > 0);
2411
2412         /* reset the fifos to get pointers to known state */
2413
2414         writel(0, &phy_mgr_cmd->fifo_reset);
2415
2416         if (found_one) {
2417                 /* add a fudge factor to the read latency that was determined */
2418                 gbl->curr_read_lat += 2;
2419                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2420                 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \
2421                            read_lat=%u\n", __func__, __LINE__,
2422                            gbl->curr_read_lat);
2423                 return 1;
2424         } else {
2425                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2426                                         CAL_SUBSTAGE_READ_LATENCY);
2427
2428                 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \
2429                            read_lat=%u\n", __func__, __LINE__,
2430                            gbl->curr_read_lat);
2431                 return 0;
2432         }
2433 }
2434
2435 /*
2436  * issue write test command.
2437  * two variants are provided. one that just tests a write pattern and
2438  * another that tests datamask functionality.
2439  */
2440 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group,
2441                                                   uint32_t test_dm)
2442 {
2443         uint32_t mcc_instruction;
2444         uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) &&
2445                 ENABLE_SUPER_QUICK_CALIBRATION);
2446         uint32_t rw_wl_nop_cycles;
2447         uint32_t addr;
2448
2449         /*
2450          * Set counter and jump addresses for the right
2451          * number of NOP cycles.
2452          * The number of supported NOP cycles can range from -1 to infinity
2453          * Three different cases are handled:
2454          *
2455          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
2456          *    mechanism will be used to insert the right number of NOPs
2457          *
2458          * 2. For a number of NOP cycles equals to 0, the micro-instruction
2459          *    issuing the write command will jump straight to the
2460          *    micro-instruction that turns on DQS (for DDRx), or outputs write
2461          *    data (for RLD), skipping
2462          *    the NOP micro-instruction all together
2463          *
2464          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
2465          *    turned on in the same micro-instruction that issues the write
2466          *    command. Then we need
2467          *    to directly jump to the micro-instruction that sends out the data
2468          *
2469          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
2470          *       (2 and 3). One jump-counter (0) is used to perform multiple
2471          *       write-read operations.
2472          *       one counter left to issue this command in "multiple-group" mode
2473          */
2474
2475         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
2476
2477         if (rw_wl_nop_cycles == -1) {
2478                 /*
2479                  * CNTR 2 - We want to execute the special write operation that
2480                  * turns on DQS right away and then skip directly to the
2481                  * instruction that sends out the data. We set the counter to a
2482                  * large number so that the jump is always taken.
2483                  */
2484                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2485
2486                 /* CNTR 3 - Not used */
2487                 if (test_dm) {
2488                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
2489                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
2490                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2491                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2492                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2493                 } else {
2494                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
2495                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
2496                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2497                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2498                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2499                 }
2500         } else if (rw_wl_nop_cycles == 0) {
2501                 /*
2502                  * CNTR 2 - We want to skip the NOP operation and go straight
2503                  * to the DQS enable instruction. We set the counter to a large
2504                  * number so that the jump is always taken.
2505                  */
2506                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2507
2508                 /* CNTR 3 - Not used */
2509                 if (test_dm) {
2510                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2511                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
2512                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2513                 } else {
2514                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2515                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
2516                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2517                 }
2518         } else {
2519                 /*
2520                  * CNTR 2 - In this case we want to execute the next instruction
2521                  * and NOT take the jump. So we set the counter to 0. The jump
2522                  * address doesn't count.
2523                  */
2524                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
2525                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2526
2527                 /*
2528                  * CNTR 3 - Set the nop counter to the number of cycles we
2529                  * need to loop for, minus 1.
2530                  */
2531                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
2532                 if (test_dm) {
2533                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2534                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2535                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2536                 } else {
2537                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2538                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2539                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2540                 }
2541         }
2542
2543         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
2544                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
2545
2546         if (quick_write_mode)
2547                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
2548         else
2549                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
2550
2551         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
2552
2553         /*
2554          * CNTR 1 - This is used to ensure enough time elapses
2555          * for read data to come back.
2556          */
2557         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
2558
2559         if (test_dm) {
2560                 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
2561                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2562         } else {
2563                 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
2564                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2565         }
2566
2567         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
2568         writel(mcc_instruction, addr + (group << 2));
2569 }
2570
2571 /* Test writes, can check for a single bit pass or multiple bit pass */
2572 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
2573         uint32_t write_group, uint32_t use_dm, uint32_t all_correct,
2574         uint32_t *bit_chk, uint32_t all_ranks)
2575 {
2576         uint32_t r;
2577         uint32_t correct_mask_vg;
2578         uint32_t tmp_bit_chk;
2579         uint32_t vg;
2580         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
2581                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
2582         uint32_t addr_rw_mgr;
2583         uint32_t base_rw_mgr;
2584
2585         *bit_chk = param->write_correct_mask;
2586         correct_mask_vg = param->write_correct_mask_vg;
2587
2588         for (r = rank_bgn; r < rank_end; r++) {
2589                 if (param->skip_ranks[r]) {
2590                         /* request to skip the rank */
2591                         continue;
2592                 }
2593
2594                 /* set rank */
2595                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
2596
2597                 tmp_bit_chk = 0;
2598                 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS;
2599                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) {
2600                         /* reset the fifos to get pointers to known state */
2601                         writel(0, &phy_mgr_cmd->fifo_reset);
2602
2603                         tmp_bit_chk = tmp_bit_chk <<
2604                                 (RW_MGR_MEM_DQ_PER_WRITE_DQS /
2605                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
2606                         rw_mgr_mem_calibrate_write_test_issue(write_group *
2607                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg,
2608                                 use_dm);
2609
2610                         base_rw_mgr = readl(addr_rw_mgr);
2611                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
2612                         if (vg == 0)
2613                                 break;
2614                 }
2615                 *bit_chk &= tmp_bit_chk;
2616         }
2617
2618         if (all_correct) {
2619                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2620                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \
2621                            %u => %lu", write_group, use_dm,
2622                            *bit_chk, param->write_correct_mask,
2623                            (long unsigned int)(*bit_chk ==
2624                            param->write_correct_mask));
2625                 return *bit_chk == param->write_correct_mask;
2626         } else {
2627                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2628                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ",
2629                        write_group, use_dm, *bit_chk);
2630                 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0,
2631                         (long unsigned int)(*bit_chk != 0));
2632                 return *bit_chk != 0x00;
2633         }
2634 }
2635
2636 /*
2637  * center all windows. do per-bit-deskew to possibly increase size of
2638  * certain windows.
2639  */
2640 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
2641         uint32_t write_group, uint32_t test_bgn)
2642 {
2643         uint32_t i, p, min_index;
2644         int32_t d;
2645         /*
2646          * Store these as signed since there are comparisons with
2647          * signed numbers.
2648          */
2649         uint32_t bit_chk;
2650         uint32_t sticky_bit_chk;
2651         int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2652         int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2653         int32_t mid;
2654         int32_t mid_min, orig_mid_min;
2655         int32_t new_dqs, start_dqs, shift_dq;
2656         int32_t dq_margin, dqs_margin, dm_margin;
2657         uint32_t stop;
2658         uint32_t temp_dq_out1_delay;
2659         uint32_t addr;
2660
2661         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2662
2663         dm_margin = 0;
2664
2665         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2666         start_dqs = readl(addr +
2667                           (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2668
2669         /* per-bit deskew */
2670
2671         /*
2672          * set the left and right edge of each bit to an illegal value
2673          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2674          */
2675         sticky_bit_chk = 0;
2676         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2677                 left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
2678                 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2679         }
2680
2681         /* Search for the left edge of the window for each bit */
2682         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
2683                 scc_mgr_apply_group_dq_out1_delay(write_group, d);
2684
2685                 writel(0, &sdr_scc_mgr->update);
2686
2687                 /*
2688                  * Stop searching when the read test doesn't pass AND when
2689                  * we've seen a passing read on every bit.
2690                  */
2691                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2692                         0, PASS_ONE_BIT, &bit_chk, 0);
2693                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2694                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2695                 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \
2696                            == %u && %u [bit_chk= %u ]\n",
2697                         d, sticky_bit_chk, param->write_correct_mask,
2698                         stop, bit_chk);
2699
2700                 if (stop == 1) {
2701                         break;
2702                 } else {
2703                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2704                                 if (bit_chk & 1) {
2705                                         /*
2706                                          * Remember a passing test as the
2707                                          * left_edge.
2708                                          */
2709                                         left_edge[i] = d;
2710                                 } else {
2711                                         /*
2712                                          * If a left edge has not been seen
2713                                          * yet, then a future passing test will
2714                                          * mark this edge as the right edge.
2715                                          */
2716                                         if (left_edge[i] ==
2717                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2718                                                 right_edge[i] = -(d + 1);
2719                                         }
2720                                 }
2721                                 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d);
2722                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2723                                            (int)(bit_chk & 1), i, left_edge[i]);
2724                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2725                                        right_edge[i]);
2726                                 bit_chk = bit_chk >> 1;
2727                         }
2728                 }
2729         }
2730
2731         /* Reset DQ delay chains to 0 */
2732         scc_mgr_apply_group_dq_out1_delay(0);
2733         sticky_bit_chk = 0;
2734         for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
2735                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2736                            %d right_edge[%u]: %d\n", __func__, __LINE__,
2737                            i, left_edge[i], i, right_edge[i]);
2738
2739                 /*
2740                  * Check for cases where we haven't found the left edge,
2741                  * which makes our assignment of the the right edge invalid.
2742                  * Reset it to the illegal value.
2743                  */
2744                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) &&
2745                     (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) {
2746                         right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2747                         debug_cond(DLEVEL == 2, "%s:%d write_center: reset \
2748                                    right_edge[%u]: %d\n", __func__, __LINE__,
2749                                    i, right_edge[i]);
2750                 }
2751
2752                 /*
2753                  * Reset sticky bit (except for bits where we have
2754                  * seen the left edge).
2755                  */
2756                 sticky_bit_chk = sticky_bit_chk << 1;
2757                 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1))
2758                         sticky_bit_chk = sticky_bit_chk | 1;
2759
2760                 if (i == 0)
2761                         break;
2762         }
2763
2764         /* Search for the right edge of the window for each bit */
2765         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) {
2766                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2767                                                         d + start_dqs);
2768
2769                 writel(0, &sdr_scc_mgr->update);
2770
2771                 /*
2772                  * Stop searching when the read test doesn't pass AND when
2773                  * we've seen a passing read on every bit.
2774                  */
2775                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2776                         0, PASS_ONE_BIT, &bit_chk, 0);
2777
2778                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2779                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2780
2781                 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \
2782                            %u && %u\n", d, sticky_bit_chk,
2783                            param->write_correct_mask, stop);
2784
2785                 if (stop == 1) {
2786                         if (d == 0) {
2787                                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
2788                                         i++) {
2789                                         /* d = 0 failed, but it passed when
2790                                         testing the left edge, so it must be
2791                                         marginal, set it to -1 */
2792                                         if (right_edge[i] ==
2793                                                 IO_IO_OUT1_DELAY_MAX + 1 &&
2794                                                 left_edge[i] !=
2795                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2796                                                 right_edge[i] = -1;
2797                                         }
2798                                 }
2799                         }
2800                         break;
2801                 } else {
2802                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2803                                 if (bit_chk & 1) {
2804                                         /*
2805                                          * Remember a passing test as
2806                                          * the right_edge.
2807                                          */
2808                                         right_edge[i] = d;
2809                                 } else {
2810                                         if (d != 0) {
2811                                                 /*
2812                                                  * If a right edge has not
2813                                                  * been seen yet, then a future
2814                                                  * passing test will mark this
2815                                                  * edge as the left edge.
2816                                                  */
2817                                                 if (right_edge[i] ==
2818                                                     IO_IO_OUT1_DELAY_MAX + 1)
2819                                                         left_edge[i] = -(d + 1);
2820                                         } else {
2821                                                 /*
2822                                                  * d = 0 failed, but it passed
2823                                                  * when testing the left edge,
2824                                                  * so it must be marginal, set
2825                                                  * it to -1.
2826                                                  */
2827                                                 if (right_edge[i] ==
2828                                                     IO_IO_OUT1_DELAY_MAX + 1 &&
2829                                                     left_edge[i] !=
2830                                                     IO_IO_OUT1_DELAY_MAX + 1)
2831                                                         right_edge[i] = -1;
2832                                                 /*
2833                                                  * If a right edge has not been
2834                                                  * seen yet, then a future
2835                                                  * passing test will mark this
2836                                                  * edge as the left edge.
2837                                                  */
2838                                                 else if (right_edge[i] ==
2839                                                         IO_IO_OUT1_DELAY_MAX +
2840                                                         1)
2841                                                         left_edge[i] = -(d + 1);
2842                                         }
2843                                 }
2844                                 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d);
2845                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2846                                            (int)(bit_chk & 1), i, left_edge[i]);
2847                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2848                                            right_edge[i]);
2849                                 bit_chk = bit_chk >> 1;
2850                         }
2851                 }
2852         }
2853
2854         /* Check that all bits have a window */
2855         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2856                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2857                            %d right_edge[%u]: %d", __func__, __LINE__,
2858                            i, left_edge[i], i, right_edge[i]);
2859                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) ||
2860                     (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) {
2861                         set_failing_group_stage(test_bgn + i,
2862                                                 CAL_STAGE_WRITES,
2863                                                 CAL_SUBSTAGE_WRITES_CENTER);
2864                         return 0;
2865                 }
2866         }
2867
2868         /* Find middle of window for each DQ bit */
2869         mid_min = left_edge[0] - right_edge[0];
2870         min_index = 0;
2871         for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2872                 mid = left_edge[i] - right_edge[i];
2873                 if (mid < mid_min) {
2874                         mid_min = mid;
2875                         min_index = i;
2876                 }
2877         }
2878
2879         /*
2880          * -mid_min/2 represents the amount that we need to move DQS.
2881          * If mid_min is odd and positive we'll need to add one to
2882          * make sure the rounding in further calculations is correct
2883          * (always bias to the right), so just add 1 for all positive values.
2884          */
2885         if (mid_min > 0)
2886                 mid_min++;
2887         mid_min = mid_min / 2;
2888         debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__,
2889                    __LINE__, mid_min);
2890
2891         /* Determine the amount we can change DQS (which is -mid_min) */
2892         orig_mid_min = mid_min;
2893         new_dqs = start_dqs;
2894         mid_min = 0;
2895         debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \
2896                    mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min);
2897         /* Initialize data for export structures */
2898         dqs_margin = IO_IO_OUT1_DELAY_MAX + 1;
2899         dq_margin  = IO_IO_OUT1_DELAY_MAX + 1;
2900
2901         /* add delay to bring centre of all DQ windows to the same "level" */
2902         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
2903                 /* Use values before divide by 2 to reduce round off error */
2904                 shift_dq = (left_edge[i] - right_edge[i] -
2905                         (left_edge[min_index] - right_edge[min_index]))/2  +
2906                 (orig_mid_min - mid_min);
2907
2908                 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \
2909                            [%u]=%d\n", __func__, __LINE__, i, shift_dq);
2910
2911                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2912                 temp_dq_out1_delay = readl(addr + (i << 2));
2913                 if (shift_dq + (int32_t)temp_dq_out1_delay >
2914                         (int32_t)IO_IO_OUT1_DELAY_MAX) {
2915                         shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay;
2916                 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) {
2917                         shift_dq = -(int32_t)temp_dq_out1_delay;
2918                 }
2919                 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n",
2920                            i, shift_dq);
2921                 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq);
2922                 scc_mgr_load_dq(i);
2923
2924                 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i,
2925                            left_edge[i] - shift_dq + (-mid_min),
2926                            right_edge[i] + shift_dq - (-mid_min));
2927                 /* To determine values for export structures */
2928                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2929                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2930
2931                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2932                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2933         }
2934
2935         /* Move DQS */
2936         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
2937         writel(0, &sdr_scc_mgr->update);
2938
2939         /* Centre DM */
2940         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
2941
2942         /*
2943          * set the left and right edge of each bit to an illegal value,
2944          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value,
2945          */
2946         left_edge[0]  = IO_IO_OUT1_DELAY_MAX + 1;
2947         right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
2948         int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2949         int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2950         int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2951         int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1;
2952         int32_t win_best = 0;
2953
2954         /* Search for the/part of the window with DM shift */
2955         for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) {
2956                 scc_mgr_apply_group_dm_out1_delay(d);
2957                 writel(0, &sdr_scc_mgr->update);
2958
2959                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2960                                                     PASS_ALL_BITS, &bit_chk,
2961                                                     0)) {
2962                         /* USE Set current end of the window */
2963                         end_curr = -d;
2964                         /*
2965                          * If a starting edge of our window has not been seen
2966                          * this is our current start of the DM window.
2967                          */
2968                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2969                                 bgn_curr = -d;
2970
2971                         /*
2972                          * If current window is bigger than best seen.
2973                          * Set best seen to be current window.
2974                          */
2975                         if ((end_curr-bgn_curr+1) > win_best) {
2976                                 win_best = end_curr-bgn_curr+1;
2977                                 bgn_best = bgn_curr;
2978                                 end_best = end_curr;
2979                         }
2980                 } else {
2981                         /* We just saw a failing test. Reset temp edge */
2982                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2983                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2984                         }
2985                 }
2986
2987
2988         /* Reset DM delay chains to 0 */
2989         scc_mgr_apply_group_dm_out1_delay(0);
2990
2991         /*
2992          * Check to see if the current window nudges up aganist 0 delay.
2993          * If so we need to continue the search by shifting DQS otherwise DQS
2994          * search begins as a new search. */
2995         if (end_curr != 0) {
2996                 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2997                 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2998         }
2999
3000         /* Search for the/part of the window with DQS shifts */
3001         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) {
3002                 /*
3003                  * Note: This only shifts DQS, so are we limiting ourselve to
3004                  * width of DQ unnecessarily.
3005                  */
3006                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
3007                                                         d + new_dqs);
3008
3009                 writel(0, &sdr_scc_mgr->update);
3010                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
3011                                                     PASS_ALL_BITS, &bit_chk,
3012                                                     0)) {
3013                         /* USE Set current end of the window */
3014                         end_curr = d;
3015                         /*
3016                          * If a beginning edge of our window has not been seen
3017                          * this is our current begin of the DM window.
3018                          */
3019                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
3020                                 bgn_curr = d;
3021
3022                         /*
3023                          * If current window is bigger than best seen. Set best
3024                          * seen to be current window.
3025                          */
3026                         if ((end_curr-bgn_curr+1) > win_best) {
3027                                 win_best = end_curr-bgn_curr+1;
3028                                 bgn_best = bgn_curr;
3029                                 end_best = end_curr;
3030                         }
3031                 } else {
3032                         /* We just saw a failing test. Reset temp edge */
3033                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3034                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3035
3036                         /* Early exit optimization: if ther remaining delay
3037                         chain space is less than already seen largest window
3038                         we can exit */
3039                         if ((win_best-1) >
3040                                 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) {
3041                                         break;
3042                                 }
3043                         }
3044                 }
3045
3046         /* assign left and right edge for cal and reporting; */
3047         left_edge[0] = -1*bgn_best;
3048         right_edge[0] = end_best;
3049
3050         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__,
3051                    __LINE__, left_edge[0], right_edge[0]);
3052
3053         /* Move DQS (back to orig) */
3054         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3055
3056         /* Move DM */
3057
3058         /* Find middle of window for the DM bit */
3059         mid = (left_edge[0] - right_edge[0]) / 2;
3060
3061         /* only move right, since we are not moving DQS/DQ */
3062         if (mid < 0)
3063                 mid = 0;
3064
3065         /* dm_marign should fail if we never find a window */
3066         if (win_best == 0)
3067                 dm_margin = -1;
3068         else
3069                 dm_margin = left_edge[0] - mid;
3070
3071         scc_mgr_apply_group_dm_out1_delay(mid);
3072         writel(0, &sdr_scc_mgr->update);
3073
3074         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \
3075                    dm_margin=%d\n", __func__, __LINE__, left_edge[0],
3076                    right_edge[0], mid, dm_margin);
3077         /* Export values */
3078         gbl->fom_out += dq_margin + dqs_margin;
3079
3080         debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \
3081                    dqs_margin=%d dm_margin=%d\n", __func__, __LINE__,
3082                    dq_margin, dqs_margin, dm_margin);
3083
3084         /*
3085          * Do not remove this line as it makes sure all of our
3086          * decisions have been applied.
3087          */
3088         writel(0, &sdr_scc_mgr->update);
3089         return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0);
3090 }
3091
3092 /* calibrate the write operations */
3093 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g,
3094         uint32_t test_bgn)
3095 {
3096         /* update info for sims */
3097         debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn);
3098
3099         reg_file_set_stage(CAL_STAGE_WRITES);
3100         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3101
3102         reg_file_set_group(g);
3103
3104         if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) {
3105                 set_failing_group_stage(g, CAL_STAGE_WRITES,
3106                                         CAL_SUBSTAGE_WRITES_CENTER);
3107                 return 0;
3108         }
3109
3110         return 1;
3111 }
3112
3113 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */
3114 static void mem_precharge_and_activate(void)
3115 {
3116         uint32_t r;
3117
3118         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3119                 if (param->skip_ranks[r]) {
3120                         /* request to skip the rank */
3121                         continue;
3122                 }
3123
3124                 /* set rank */
3125                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3126
3127                 /* precharge all banks ... */
3128                 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3129                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3130
3131                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3132                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3133                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3134
3135                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3136                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3137                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3138
3139                 /* activate rows */
3140                 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3141                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3142         }
3143 }
3144
3145 /* Configure various memory related parameters. */
3146 static void mem_config(void)
3147 {
3148         uint32_t rlat, wlat;
3149         uint32_t rw_wl_nop_cycles;
3150         uint32_t max_latency;
3151
3152         debug("%s:%d\n", __func__, __LINE__);
3153         /* read in write and read latency */
3154         wlat = readl(&data_mgr->t_wl_add);
3155         wlat += readl(&data_mgr->mem_t_add);
3156
3157         /* WL for hard phy does not include additive latency */
3158
3159         /*
3160          * add addtional write latency to offset the address/command extra
3161          * clock cycle. We change the AC mux setting causing AC to be delayed
3162          * by one mem clock cycle. Only do this for DDR3
3163          */
3164         wlat = wlat + 1;
3165
3166         rlat = readl(&data_mgr->t_rl_add);
3167
3168         rw_wl_nop_cycles = wlat - 2;
3169         gbl->rw_wl_nop_cycles = rw_wl_nop_cycles;
3170
3171         /*
3172          * For AV/CV, lfifo is hardened and always runs at full rate so
3173          * max latency in AFI clocks, used here, is correspondingly smaller.
3174          */
3175         max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1;
3176         /* configure for a burst length of 8 */
3177
3178         /* write latency */
3179         /* Adjust Write Latency for Hard PHY */
3180         wlat = wlat + 1;
3181
3182         /* set a pretty high read latency initially */
3183         gbl->curr_read_lat = rlat + 16;
3184
3185         if (gbl->curr_read_lat > max_latency)
3186                 gbl->curr_read_lat = max_latency;
3187
3188         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3189
3190         /* advertise write latency */
3191         gbl->curr_write_lat = wlat;
3192         writel(wlat - 2, &phy_mgr_cfg->afi_wlat);
3193
3194         /* initialize bit slips */
3195         mem_precharge_and_activate();
3196 }
3197
3198 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */
3199 static void mem_skip_calibrate(void)
3200 {
3201         uint32_t vfifo_offset;
3202         uint32_t i, j, r;
3203
3204         debug("%s:%d\n", __func__, __LINE__);
3205         /* Need to update every shadow register set used by the interface */
3206         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3207                 r += NUM_RANKS_PER_SHADOW_REG) {
3208                 /*
3209                  * Set output phase alignment settings appropriate for
3210                  * skip calibration.
3211                  */
3212                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3213                         scc_mgr_set_dqs_en_phase(i, 0);
3214 #if IO_DLL_CHAIN_LENGTH == 6
3215                         scc_mgr_set_dqdqs_output_phase(i, 6);
3216 #else
3217                         scc_mgr_set_dqdqs_output_phase(i, 7);
3218 #endif
3219                         /*
3220                          * Case:33398
3221                          *
3222                          * Write data arrives to the I/O two cycles before write
3223                          * latency is reached (720 deg).
3224                          *   -> due to bit-slip in a/c bus
3225                          *   -> to allow board skew where dqs is longer than ck
3226                          *      -> how often can this happen!?
3227                          *      -> can claim back some ptaps for high freq
3228                          *       support if we can relax this, but i digress...
3229                          *
3230                          * The write_clk leads mem_ck by 90 deg
3231                          * The minimum ptap of the OPA is 180 deg
3232                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3233                          * The write_clk is always delayed by 2 ptaps
3234                          *
3235                          * Hence, to make DQS aligned to CK, we need to delay
3236                          * DQS by:
3237                          *    (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3238                          *
3239                          * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3240                          * gives us the number of ptaps, which simplies to:
3241                          *
3242                          *    (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3243                          */
3244                         scc_mgr_set_dqdqs_output_phase(i, (1.25 *
3245                                 IO_DLL_CHAIN_LENGTH - 2));
3246                 }
3247                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3248                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3249
3250                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3251                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3252                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3253                 }
3254                 writel(0xff, &sdr_scc_mgr->dq_ena);
3255                 writel(0xff, &sdr_scc_mgr->dm_ena);
3256                 writel(0, &sdr_scc_mgr->update);
3257         }
3258
3259         /* Compensate for simulation model behaviour */
3260         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3261                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3262                 scc_mgr_load_dqs(i);
3263         }
3264         writel(0, &sdr_scc_mgr->update);
3265
3266         /*
3267          * ArriaV has hard FIFOs that can only be initialized by incrementing
3268          * in sequencer.
3269          */
3270         vfifo_offset = CALIB_VFIFO_OFFSET;
3271         for (j = 0; j < vfifo_offset; j++) {
3272                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3273         }
3274         writel(0, &phy_mgr_cmd->fifo_reset);
3275
3276         /*
3277          * For ACV with hard lfifo, we get the skip-cal setting from
3278          * generation-time constant.
3279          */
3280         gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3281         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3282 }
3283
3284 /* Memory calibration entry point */
3285 static uint32_t mem_calibrate(void)
3286 {
3287         uint32_t i;
3288         uint32_t rank_bgn, sr;
3289         uint32_t write_group, write_test_bgn;
3290         uint32_t read_group, read_test_bgn;
3291         uint32_t run_groups, current_run;
3292         uint32_t failing_groups = 0;
3293         uint32_t group_failed = 0;
3294         uint32_t sr_failed = 0;
3295
3296         debug("%s:%d\n", __func__, __LINE__);
3297         /* Initialize the data settings */
3298
3299         gbl->error_substage = CAL_SUBSTAGE_NIL;
3300         gbl->error_stage = CAL_STAGE_NIL;
3301         gbl->error_group = 0xff;
3302         gbl->fom_in = 0;
3303         gbl->fom_out = 0;
3304
3305         mem_config();
3306
3307         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3308                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3309                           SCC_MGR_GROUP_COUNTER_OFFSET);
3310                 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3311                 if (i == 0)
3312                         scc_mgr_set_hhp_extras();
3313
3314                 scc_set_bypass_mode(i);
3315         }
3316
3317         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3318                 /*
3319                  * Set VFIFO and LFIFO to instant-on settings in skip
3320                  * calibration mode.
3321                  */
3322                 mem_skip_calibrate();
3323         } else {
3324                 for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3325                         /*
3326                          * Zero all delay chain/phase settings for all
3327                          * groups and all shadow register sets.
3328                          */
3329                         scc_mgr_zero_all();
3330
3331                         run_groups = ~param->skip_groups;
3332
3333                         for (write_group = 0, write_test_bgn = 0; write_group
3334                                 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3335                                 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3336                                 /* Initialized the group failure */
3337                                 group_failed = 0;
3338
3339                                 current_run = run_groups & ((1 <<
3340                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3341                                 run_groups = run_groups >>
3342                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3343
3344                                 if (current_run == 0)
3345                                         continue;
3346
3347                                 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3348                                                     SCC_MGR_GROUP_COUNTER_OFFSET);
3349                                 scc_mgr_zero_group(write_group, 0);
3350
3351                                 for (read_group = write_group *
3352                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3353                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3354                                         read_test_bgn = 0;
3355                                         read_group < (write_group + 1) *
3356                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3357                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3358                                         group_failed == 0;
3359                                         read_group++, read_test_bgn +=
3360                                         RW_MGR_MEM_DQ_PER_READ_DQS) {
3361                                         /* Calibrate the VFIFO */
3362                                         if (!((STATIC_CALIB_STEPS) &
3363                                                 CALIB_SKIP_VFIFO)) {
3364                                                 if (!rw_mgr_mem_calibrate_vfifo
3365                                                         (read_group,
3366                                                         read_test_bgn)) {
3367                                                         group_failed = 1;
3368
3369                                                         if (!(gbl->
3370                                                         phy_debug_mode_flags &
3371                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3372                                                                 return 0;
3373                                                         }
3374                                                 }
3375                                         }
3376                                 }
3377
3378                                 /* Calibrate the output side */
3379                                 if (group_failed == 0)  {
3380                                         for (rank_bgn = 0, sr = 0; rank_bgn
3381                                                 < RW_MGR_MEM_NUMBER_OF_RANKS;
3382                                                 rank_bgn +=
3383                                                 NUM_RANKS_PER_SHADOW_REG,
3384                                                 ++sr) {
3385                                                 sr_failed = 0;
3386                                                 if (!((STATIC_CALIB_STEPS) &
3387                                                 CALIB_SKIP_WRITES)) {
3388                                                         if ((STATIC_CALIB_STEPS)
3389                                                 & CALIB_SKIP_DELAY_SWEEPS) {
3390                                                 /* not needed in quick mode! */
3391                                                         } else {
3392                                                 /*
3393                                                  * Determine if this set of
3394                                                  * ranks should be skipped
3395                                                  * entirely.
3396                                                  */
3397                                         if (!param->skip_shadow_regs[sr]) {
3398                                                 if (!rw_mgr_mem_calibrate_writes
3399                                                 (rank_bgn, write_group,
3400                                                 write_test_bgn)) {
3401                                                         sr_failed = 1;
3402                                                         if (!(gbl->
3403                                                         phy_debug_mode_flags &
3404                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3405                                                                 return 0;
3406                                                                         }
3407                                                                         }
3408                                                                 }
3409                                                         }
3410                                                 }
3411                                                 if (sr_failed != 0)
3412                                                         group_failed = 1;
3413                                         }
3414                                 }
3415
3416                                 if (group_failed == 0) {
3417                                         for (read_group = write_group *
3418                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3419                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3420                                         read_test_bgn = 0;
3421                                                 read_group < (write_group + 1)
3422                                                 * RW_MGR_MEM_IF_READ_DQS_WIDTH
3423                                                 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3424                                                 group_failed == 0;
3425                                                 read_group++, read_test_bgn +=
3426                                                 RW_MGR_MEM_DQ_PER_READ_DQS) {
3427                                                 if (!((STATIC_CALIB_STEPS) &
3428                                                         CALIB_SKIP_WRITES)) {
3429                                         if (!rw_mgr_mem_calibrate_vfifo_end
3430                                                 (read_group, read_test_bgn)) {
3431                                                         group_failed = 1;
3432
3433                                                 if (!(gbl->phy_debug_mode_flags
3434                                                 & PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3435                                                                 return 0;
3436                                                                 }
3437                                                         }
3438                                                 }
3439                                         }
3440                                 }
3441
3442                                 if (group_failed != 0)
3443                                         failing_groups++;
3444                         }
3445
3446                         /*
3447                          * USER If there are any failing groups then report
3448                          * the failure.
3449                          */
3450                         if (failing_groups != 0)
3451                                 return 0;
3452
3453                         /* Calibrate the LFIFO */
3454                         if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) {
3455                                 /*
3456                                  * If we're skipping groups as part of debug,
3457                                  * don't calibrate LFIFO.
3458                                  */
3459                                 if (param->skip_groups == 0) {
3460                                         if (!rw_mgr_mem_calibrate_lfifo())
3461                                                 return 0;
3462                                 }
3463                         }
3464                 }
3465         }
3466
3467         /*
3468          * Do not remove this line as it makes sure all of our decisions
3469          * have been applied.
3470          */
3471         writel(0, &sdr_scc_mgr->update);
3472         return 1;
3473 }
3474
3475 static uint32_t run_mem_calibrate(void)
3476 {
3477         uint32_t pass;
3478         uint32_t debug_info;
3479
3480         debug("%s:%d\n", __func__, __LINE__);
3481
3482         /* Reset pass/fail status shown on afi_cal_success/fail */
3483         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3484
3485         /* stop tracking manger */
3486         uint32_t ctrlcfg = readl(&sdr_ctrl->ctrl_cfg);
3487
3488         writel(ctrlcfg & 0xFFBFFFFF, &sdr_ctrl->ctrl_cfg);
3489
3490         initialize();
3491         rw_mgr_mem_initialize();
3492
3493         pass = mem_calibrate();
3494
3495         mem_precharge_and_activate();
3496         writel(0, &phy_mgr_cmd->fifo_reset);
3497
3498         /*
3499          * Handoff:
3500          * Don't return control of the PHY back to AFI when in debug mode.
3501          */
3502         if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) {
3503                 rw_mgr_mem_handoff();
3504                 /*
3505                  * In Hard PHY this is a 2-bit control:
3506                  * 0: AFI Mux Select
3507                  * 1: DDIO Mux Select
3508                  */
3509                 writel(0x2, &phy_mgr_cfg->mux_sel);
3510         }
3511
3512         writel(ctrlcfg, &sdr_ctrl->ctrl_cfg);
3513
3514         if (pass) {
3515                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3516
3517                 gbl->fom_in /= 2;
3518                 gbl->fom_out /= 2;
3519
3520                 if (gbl->fom_in > 0xff)
3521                         gbl->fom_in = 0xff;
3522
3523                 if (gbl->fom_out > 0xff)
3524                         gbl->fom_out = 0xff;
3525
3526                 /* Update the FOM in the register file */
3527                 debug_info = gbl->fom_in;
3528                 debug_info |= gbl->fom_out << 8;
3529                 writel(debug_info, &sdr_reg_file->fom);
3530
3531                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3532                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3533         } else {
3534                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3535
3536                 debug_info = gbl->error_stage;
3537                 debug_info |= gbl->error_substage << 8;
3538                 debug_info |= gbl->error_group << 16;
3539
3540                 writel(debug_info, &sdr_reg_file->failing_stage);
3541                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3542                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3543
3544                 /* Update the failing group/stage in the register file */
3545                 debug_info = gbl->error_stage;
3546                 debug_info |= gbl->error_substage << 8;
3547                 debug_info |= gbl->error_group << 16;
3548                 writel(debug_info, &sdr_reg_file->failing_stage);
3549         }
3550
3551         return pass;
3552 }
3553
3554 /**
3555  * hc_initialize_rom_data() - Initialize ROM data
3556  *
3557  * Initialize ROM data.
3558  */
3559 static void hc_initialize_rom_data(void)
3560 {
3561         u32 i, addr;
3562
3563         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3564         for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++)
3565                 writel(inst_rom_init[i], addr + (i << 2));
3566
3567         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3568         for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++)
3569                 writel(ac_rom_init[i], addr + (i << 2));
3570 }
3571
3572 /**
3573  * initialize_reg_file() - Initialize SDR register file
3574  *
3575  * Initialize SDR register file.
3576  */
3577 static void initialize_reg_file(void)
3578 {
3579         /* Initialize the register file with the correct data */
3580         writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3581         writel(0, &sdr_reg_file->debug_data_addr);
3582         writel(0, &sdr_reg_file->cur_stage);
3583         writel(0, &sdr_reg_file->fom);
3584         writel(0, &sdr_reg_file->failing_stage);
3585         writel(0, &sdr_reg_file->debug1);
3586         writel(0, &sdr_reg_file->debug2);
3587 }
3588
3589 /**
3590  * initialize_hps_phy() - Initialize HPS PHY
3591  *
3592  * Initialize HPS PHY.
3593  */
3594 static void initialize_hps_phy(void)
3595 {
3596         uint32_t reg;
3597         /*
3598          * Tracking also gets configured here because it's in the
3599          * same register.
3600          */
3601         uint32_t trk_sample_count = 7500;
3602         uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3603         /*
3604          * Format is number of outer loops in the 16 MSB, sample
3605          * count in 16 LSB.
3606          */
3607
3608         reg = 0;
3609         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3610         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3611         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3612         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3613         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3614         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3615         /*
3616          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3617          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3618          */
3619         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3620         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3621                 trk_sample_count);
3622         writel(reg, &sdr_ctrl->phy_ctrl0);
3623
3624         reg = 0;
3625         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3626                 trk_sample_count >>
3627                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3628         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3629                 trk_long_idle_sample_count);
3630         writel(reg, &sdr_ctrl->phy_ctrl1);
3631
3632         reg = 0;
3633         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3634                 trk_long_idle_sample_count >>
3635                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3636         writel(reg, &sdr_ctrl->phy_ctrl2);
3637 }
3638
3639 static void initialize_tracking(void)
3640 {
3641         uint32_t concatenated_longidle = 0x0;
3642         uint32_t concatenated_delays = 0x0;
3643         uint32_t concatenated_rw_addr = 0x0;
3644         uint32_t concatenated_refresh = 0x0;
3645         uint32_t trk_sample_count = 7500;
3646         uint32_t dtaps_per_ptap;
3647         uint32_t tmp_delay;
3648
3649         /*
3650          * compute usable version of value in case we skip full
3651          * computation later
3652          */
3653         dtaps_per_ptap = 0;
3654         tmp_delay = 0;
3655         while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
3656                 dtaps_per_ptap++;
3657                 tmp_delay += IO_DELAY_PER_DCHAIN_TAP;
3658         }
3659         dtaps_per_ptap--;
3660
3661         concatenated_longidle = concatenated_longidle ^ 10;
3662                 /*longidle outer loop */
3663         concatenated_longidle = concatenated_longidle << 16;
3664         concatenated_longidle = concatenated_longidle ^ 100;
3665                 /*longidle sample count */
3666         concatenated_delays = concatenated_delays ^ 243;
3667                 /* trfc, worst case of 933Mhz 4Gb */
3668         concatenated_delays = concatenated_delays << 8;
3669         concatenated_delays = concatenated_delays ^ 14;
3670                 /* trcd, worst case */
3671         concatenated_delays = concatenated_delays << 8;
3672         concatenated_delays = concatenated_delays ^ 10;
3673                 /* vfifo wait */
3674         concatenated_delays = concatenated_delays << 8;
3675         concatenated_delays = concatenated_delays ^ 4;
3676                 /* mux delay */
3677
3678         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE;
3679         concatenated_rw_addr = concatenated_rw_addr << 8;
3680         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1;
3681         concatenated_rw_addr = concatenated_rw_addr << 8;
3682         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ;
3683         concatenated_rw_addr = concatenated_rw_addr << 8;
3684         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL;
3685
3686         concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL;
3687         concatenated_refresh = concatenated_refresh << 24;
3688         concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */
3689
3690         /* Initialize the register file with the correct data */
3691         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
3692         writel(trk_sample_count, &sdr_reg_file->trk_sample_count);
3693         writel(concatenated_longidle, &sdr_reg_file->trk_longidle);
3694         writel(concatenated_delays, &sdr_reg_file->delays);
3695         writel(concatenated_rw_addr, &sdr_reg_file->trk_rw_mgr_addr);
3696         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, &sdr_reg_file->trk_read_dqs_width);
3697         writel(concatenated_refresh, &sdr_reg_file->trk_rfsh);
3698 }
3699
3700 int sdram_calibration_full(void)
3701 {
3702         struct param_type my_param;
3703         struct gbl_type my_gbl;
3704         uint32_t pass;
3705         uint32_t i;
3706
3707         param = &my_param;
3708         gbl = &my_gbl;
3709
3710         /* Initialize the debug mode flags */
3711         gbl->phy_debug_mode_flags = 0;
3712         /* Set the calibration enabled by default */
3713         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3714         /*
3715          * Only sweep all groups (regardless of fail state) by default
3716          * Set enabled read test by default.
3717          */
3718 #if DISABLE_GUARANTEED_READ
3719         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3720 #endif
3721         /* Initialize the register file */
3722         initialize_reg_file();
3723
3724         /* Initialize any PHY CSR */
3725         initialize_hps_phy();
3726
3727         scc_mgr_initialize();
3728
3729         initialize_tracking();
3730
3731         /* USER Enable all ranks, groups */
3732         for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++)
3733                 param->skip_ranks[i] = 0;
3734         for (i = 0; i < NUM_SHADOW_REGS; ++i)
3735                 param->skip_shadow_regs[i] = 0;
3736         param->skip_groups = 0;
3737
3738         printf("%s: Preparing to start memory calibration\n", __FILE__);
3739
3740         debug("%s:%d\n", __func__, __LINE__);
3741         debug_cond(DLEVEL == 1,
3742                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3743                    RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3744                    RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3745                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3746                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3747         debug_cond(DLEVEL == 1,
3748                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3749                    RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3750                    RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3751                    IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3752         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3753                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3754         debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3755                    IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3756                    IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3757         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3758                    IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3759                    IO_IO_OUT2_DELAY_MAX);
3760         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3761                    IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3762
3763         hc_initialize_rom_data();
3764
3765         /* update info for sims */
3766         reg_file_set_stage(CAL_STAGE_NIL);
3767         reg_file_set_group(0);
3768
3769         /*
3770          * Load global needed for those actions that require
3771          * some dynamic calibration support.
3772          */
3773         dyn_calib_steps = STATIC_CALIB_STEPS;
3774         /*
3775          * Load global to allow dynamic selection of delay loop settings
3776          * based on calibration mode.
3777          */
3778         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3779                 skip_delay_mask = 0xff;
3780         else
3781                 skip_delay_mask = 0x0;
3782
3783         pass = run_mem_calibrate();
3784
3785         printf("%s: Calibration complete\n", __FILE__);
3786         return pass;
3787 }