summaryrefslogtreecommitdiffstats
path: root/resources/libreboot/patch/kgpe-d16/0088-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch
diff options
context:
space:
mode:
Diffstat (limited to 'resources/libreboot/patch/kgpe-d16/0088-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch')
-rw-r--r--resources/libreboot/patch/kgpe-d16/0088-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch1887
1 files changed, 1887 insertions, 0 deletions
diff --git a/resources/libreboot/patch/kgpe-d16/0088-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch b/resources/libreboot/patch/kgpe-d16/0088-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch
new file mode 100644
index 0000000..3e708f5
--- /dev/null
+++ b/resources/libreboot/patch/kgpe-d16/0088-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch
@@ -0,0 +1,1887 @@
+From f30d6c75652c95152eb0dbe6bf9da2198a780a84 Mon Sep 17 00:00:00 2001
+From: Timothy Pearson <tpearson@raptorengineeringinc.com>
+Date: Tue, 28 Jul 2015 15:16:46 -0500
+Subject: [PATCH 088/143] northbridge/amd/amdmct/mct_ddr3: Add registered and
+ x4 DIMM support to Fam15h
+
+Change-Id: I9ee0bb7346aa35f564fe535cdd337ec7f6148f2b
+Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com>
+---
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 186 ++++++-----
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 2 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 4 +
+ src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 17 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctrci.c | 191 +++++++----
+ src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c | 42 ++-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 253 ++++++++-------
+ src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 16 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 400 +++++++++++++++---------
+ src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h | 13 +-
+ 10 files changed, 698 insertions(+), 426 deletions(-)
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+index b29ff3c..1c9c568 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+@@ -166,7 +166,7 @@ static void mct_EnDllShutdownSR(struct MCTStatStruc *pMCTstat,
+ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat);
++ struct DCTStatStruc *pDCTstatA, uint8_t Node);
+
+ static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel);
+@@ -1404,6 +1404,10 @@ static void precise_memclk_delay_fam15(struct MCTStatStruc *pMCTstat, struct DCT
+
+ memclk_freq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
+
++ if (fam15h_freq_tab[memclk_freq] == 0) {
++ printk(BIOS_DEBUG, "ERROR: precise_memclk_delay_fam15 for DCT %d (delay %d clocks) failed to obtain valid memory frequency!"
++ " (pDCTstat: %p pDCTstat->dev_dct: %08x memclk_freq: %02x)\n", dct, clocks, pDCTstat, pDCTstat->dev_dct, memclk_freq);
++ }
+ delay_ns = (((uint64_t)clocks * 1000) / fam15h_freq_tab[memclk_freq]);
+ precise_ndelay_fam15(pMCTstat, delay_ns);
+ }
+@@ -2320,7 +2324,7 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
+ nv_DQSTrainCTL = !allow_config_restore;
+
+ mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA);
+- phyAssistedMemFnceTraining(pMCTstat, pDCTstatA);
++ phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, -1);
+
+ if (is_fam15h()) {
+ uint8_t Node;
+@@ -3359,7 +3363,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
+ }
+
+ static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct)
++ struct DCTStatStruc *pDCTstat, uint8_t dct)
+ {
+ /* Initialize DCT Timing registers as per DIMM SPD.
+ * For primary timing (T, CL) use best case T value.
+@@ -3463,7 +3467,7 @@ static void GetPresetmaxF_D(struct MCTStatStruc *pMCTstat,
+ }
+
+ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct)
++ struct DCTStatStruc *pDCTstat, uint8_t dct)
+ {
+ /* Find the best T and CL primary timing parameter pair, per Mfg.,
+ * for the given set of DIMMs, and store into DCTStatStruc
+@@ -3742,10 +3746,15 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ dword++;
+ }
+
+- if (Status & (1 << SB_Registered))
+- DramConfigLo |= 1 << ParEn; /* Registered DIMMs */
+- else
+- DramConfigLo |= 1 << UnBuffDimm; /* Unbuffered DIMMs */
++ if (Status & (1 << SB_Registered)) {
++ /* Registered DIMMs */
++ if (!is_fam15h()) {
++ DramConfigLo |= 1 << ParEn;
++ }
++ } else {
++ /* Unbuffered DIMMs */
++ DramConfigLo |= 1 << UnBuffDimm;
++ }
+
+ if (mctGet_NVbits(NV_ECC_CAP))
+ if (Status & (1 << SB_ECCDIMMs))
+@@ -3763,10 +3772,11 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ DramConfigHi |= dword - offset; /* get MemClk encoding */
+ DramConfigHi |= 1 << MemClkFreqVal;
+
+- if (Status & (1 << SB_Registered))
+- if ((pDCTstat->Dimmx4Present != 0) && (pDCTstat->Dimmx8Present != 0))
+- /* set only if x8 Registered DIMMs in System*/
+- DramConfigHi |= 1 << RDqsEn;
++ if (!is_fam15h())
++ if (Status & (1 << SB_Registered))
++ if ((pDCTstat->Dimmx4Present != 0) && (pDCTstat->Dimmx8Present != 0))
++ /* set only if x8 Registered DIMMs in System*/
++ DramConfigHi |= 1 << RDqsEn;
+
+ if (pDCTstat->LogicalCPUID & AMD_FAM15_ALL) {
+ DramConfigLo |= 1 << 25; /* PendRefPaybackS3En = 1 */
+@@ -3778,14 +3788,16 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ DramConfigHi |= 1 << 16;
+ }
+
+- /* Control Bank Swizzle */
+- if (0) /* call back not needed mctBankSwizzleControl_D()) */
+- DramConfigHi &= ~(1 << BankSwizzleMode);
+- else
+- DramConfigHi |= 1 << BankSwizzleMode; /* recommended setting (default) */
++ if (!is_fam15h()) {
++ /* Control Bank Swizzle */
++ if (0) /* call back not needed mctBankSwizzleControl_D()) */
++ DramConfigHi &= ~(1 << BankSwizzleMode);
++ else
++ DramConfigHi |= 1 << BankSwizzleMode; /* recommended setting (default) */
++ }
+
+ /* Check for Quadrank DIMM presence */
+- if ( pDCTstat->DimmQRPresent != 0) {
++ if (pDCTstat->DimmQRPresent != 0) {
+ byte = mctGet_NVbits(NV_4RANKType);
+ if (byte == 2)
+ DramConfigHi |= 1 << 17; /* S4 (4-Rank SO-DIMMs) */
+@@ -4590,8 +4602,9 @@ static u8 mct_setMode(struct MCTStatStruc *pMCTstat,
+ Set_NB32(pDCTstat->dev_dct, reg, val);
+ }
+ if (byte) /* NV_Unganged */
+- pDCTstat->ErrStatus &= ~(1 << SB_DimmMismatchO); /* Clear so that there is no DIMM missmatch error */
++ pDCTstat->ErrStatus &= ~(1 << SB_DimmMismatchO); /* Clear so that there is no DIMM mismatch error */
+ }
++
+ return pDCTstat->ErrCode;
+ }
+
+@@ -4652,6 +4665,8 @@ void Set_NB32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data)
+ static u8 mct_BeforePlatformSpec(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ /* mct_checkForCxDxSupport_D */
+ if (pDCTstat->LogicalCPUID & AMD_DR_GT_Bx) {
+ /* Family 10h Errata 322: Address and Command Fine Delay Values May Be Incorrect */
+@@ -4666,6 +4681,9 @@ static u8 mct_BeforePlatformSpec(struct MCTStatStruc *pMCTstat,
+ else
+ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D02E001, 0x90);
+ }
++
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
++
+ return pDCTstat->ErrCode;
+ }
+
+@@ -4676,6 +4694,8 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat,
+ * and program them into DCT.
+ */
+
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ u32 dev = pDCTstat->dev_dct;
+ u32 index_reg;
+ u8 i, i_start, i_end;
+@@ -4696,6 +4716,8 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat,
+ printk(BIOS_SPEW, "Programmed DCT %d timing/termination pattern %08x %08x\n", dct, pDCTstat->CH_ADDR_TMG[i], pDCTstat->CH_ODC_CTL[i]);
+ }
+
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
++
+ return pDCTstat->ErrCode;
+ }
+
+@@ -4707,7 +4729,8 @@ static void mct_SyncDCTsReady(struct DCTStatStruc *pDCTstat)
+ if (pDCTstat->NodePresent) {
+ dev = pDCTstat->dev_dct;
+
+- if ((pDCTstat->DIMMValidDCT[0] ) || (pDCTstat->DIMMValidDCT[1])) { /* This Node has dram */
++ if ((pDCTstat->DIMMValidDCT[0]) || (pDCTstat->DIMMValidDCT[1])) {
++ /* This Node has DRAM */
+ do {
+ val = Get_NB32(dev, 0x110);
+ } while (!(val & (1 << DramEnabled)));
+@@ -5655,57 +5678,56 @@ static void InitDDRPhy(struct MCTStatStruc *pMCTstat,
+ /* Fam15h BKDG v3.14 section 2.10.5.3
+ * The remainder of the Phy Initialization algorithm picks up in phyAssistedMemFnceTraining
+ */
+- for (dct = 0; dct < 2; dct++) {
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000b, 0x80000000);
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe013, 0x00000118);
+-
+- /* Program desired VDDIO level */
+- if (ddr_voltage_index & 0x4) {
+- /* 1.25V */
+- amd_voltage_level_index = 0x2;
+- } else if (ddr_voltage_index & 0x2) {
+- /* 1.35V */
+- amd_voltage_level_index = 0x1;
+- } else if (ddr_voltage_index & 0x1) {
+- /* 1.50V */
+- amd_voltage_level_index = 0x0;
+- }
+-
+- /* D18F2x9C_x0D0F_0[F,8:0]1F_dct[1:0][RxVioLvl] */
+- for (index = 0; index < 0x9; index++) {
+- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8));
+- dword &= ~(0x3 << 3);
+- dword |= (amd_voltage_level_index << 3);
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8), dword);
+- }
+-
+- /* D18F2x9C_x0D0F_[C,8,2][2:0]1F_dct[1:0][RxVioLvl] */
+- for (index = 0; index < 0x3; index++) {
+- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8));
+- dword &= ~(0x3 << 3);
+- dword |= (amd_voltage_level_index << 3);
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8), dword);
+- }
+- for (index = 0; index < 0x2; index++) {
+- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8));
+- dword &= ~(0x3 << 3);
+- dword |= (amd_voltage_level_index << 3);
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8), dword);
+- }
+- for (index = 0; index < 0x1; index++) {
+- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8));
+- dword &= ~(0x3 << 3);
+- dword |= (amd_voltage_level_index << 3);
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8), dword);
+- }
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000b, 0x80000000);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe013, 0x00000118);
+
+- /* D18F2x9C_x0D0F_4009_dct[1:0][CmpVioLvl, ComparatorAdjust] */
+- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009);
+- dword &= ~(0x0000c00c);
+- dword |= (amd_voltage_level_index << 14);
+- dword |= (amd_voltage_level_index << 2);
+- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009, dword);
+- }
++ /* Program desired VDDIO level */
++ if (ddr_voltage_index & 0x4) {
++ /* 1.25V */
++ amd_voltage_level_index = 0x2;
++ } else if (ddr_voltage_index & 0x2) {
++ /* 1.35V */
++ amd_voltage_level_index = 0x1;
++ } else if (ddr_voltage_index & 0x1) {
++ /* 1.50V */
++ amd_voltage_level_index = 0x0;
++ }
++
++ /* D18F2x9C_x0D0F_0[F,8:0]1F_dct[1:0][RxVioLvl] */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8), dword);
++ }
++
++ /* D18F2x9C_x0D0F_[C,8,2][2:0]1F_dct[1:0][RxVioLvl] */
++ for (index = 0; index < 0x3; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8), dword);
++ }
++ for (index = 0; index < 0x2; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8), dword);
++ }
++ for (index = 0; index < 0x1; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8), dword);
++ }
++
++ /* D18F2x9C_x0D0F_4009_dct[1:0][CmpVioLvl, ComparatorAdjust] */
++ /* NOTE: CmpVioLvl and ComparatorAdjust only take effect when set on DCT 0 */
++ dword = Get_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0f4009);
++ dword &= ~(0x0000c00c);
++ dword |= (amd_voltage_level_index << 14);
++ dword |= (amd_voltage_level_index << 2);
++ Set_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0f4009, dword);
+
+ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+@@ -5721,18 +5743,24 @@ static void InitPhyCompensation(struct MCTStatStruc *pMCTstat,
+ uint32_t dword;
+ const u8 *p;
+
+- printk(BIOS_DEBUG, "%s: Start\n", __func__);
++ printk(BIOS_DEBUG, "%s: DCT %d: Start\n", __func__, dct);
+
+ if (is_fam15h()) {
+ /* Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 */
+ uint32_t tx_pre;
+ uint32_t drive_strength;
+
+- /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */
++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp] */
+ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003);
+- dword |= (0x3 << 13);
++ dword |= (0x1 << 14);
+ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003, dword);
+
++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisablePredriverCal] */
++ /* NOTE: DisablePredriverCal only takes effect when set on DCT 0 */
++ dword = Get_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0fe003);
++ dword |= (0x1 << 13);
++ Set_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0fe003, dword);
++
+ /* Determine TxPreP/TxPreN for data lanes (Stage 1) */
+ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
+ drive_strength = (dword >> 20) & 0x7; /* DqsDrvStren */
+@@ -5878,12 +5906,14 @@ static void InitPhyCompensation(struct MCTStatStruc *pMCTstat,
+ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0a, dword);
+ }
+
+- printk(BIOS_DEBUG, "%s: Done\n", __func__);
++ printk(BIOS_DEBUG, "%s: DCT %d: Done\n", __func__, dct);
+ }
+
+ static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ if (!is_fam15h()) {
+ u32 reg;
+ u32 val;
+@@ -5905,6 +5935,8 @@ static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat,
+
+ Set_NB32_DCT(dev, dct, reg, val);
+ }
++
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+
+ static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat,
+@@ -6548,6 +6580,8 @@ void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat,
+
+ uint32_t dword;
+
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ if (is_fam15h()) {
+ /* Initial setup for frequency change
+ * 9C_x0000_0004 must be configured before MemClkFreqVal is set
+@@ -6580,6 +6614,8 @@ void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat,
+ mct_Wait(100);
+ }
+
++ printk(BIOS_DEBUG, "mct_SetDramConfigHi_D: DramConfigHi: %08x\n", DramConfigHi);
++
+ /* Program the DRAM Configuration High register */
+ Set_NB32_DCT(dev, dct, 0x94, DramConfigHi);
+
+@@ -6595,6 +6631,8 @@ void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat,
+ dword |= 0x0000000f;
+ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006, dword);
+ }
++
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+
+ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat,
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+index e327d38..486b16c 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+@@ -1014,7 +1014,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs
+ void InterleaveChannels_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ void mct_BeforeDQSTrain_Samp_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+
+-void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
++void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, int16_t Node);
+ u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass);
+ u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct);
+ void mct_Wait(u32 cycles);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+index 36e9858..c70fa6d 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+@@ -1588,6 +1588,7 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
+
+ for (dct = 0; dct < 2; dct++) {
+ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */
++ /* NOTE: DisablePredriverCal only takes effect when set on DCT 0 */
+ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003);
+ dword &= ~(0x3 << 13);
+ dword |= (0x1 << 13);
+@@ -1627,6 +1628,9 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
+ rx_en_offset = (initial_phy_phase_delay[lane] + 0x10) % 0x40;
+
+ /* 2.10.5.8.3 (4) */
++#if DQS_TRAIN_DEBUG > 0
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc_D_Fam15 Receiver %d lane %d initial phy delay %04x: iterating from %04x to %04x\n", Receiver, lane, initial_phy_phase_delay[lane], rx_en_offset, 0x3ff);
++#endif
+ for (current_phy_phase_delay[lane] = rx_en_offset; current_phy_phase_delay[lane] < 0x3ff; current_phy_phase_delay[lane] += ren_step) {
+ /* 2.10.5.8.3 (4 A) */
+ write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+index 539cb0d..1b81d15 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+@@ -21,7 +21,7 @@
+ static uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
+ static uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t pass);
+ static uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
+ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+@@ -133,7 +133,7 @@ static uint8_t PhyWLPass1(struct MCTStatStruc *pMCTstat,
+ }
+
+ static uint8_t PhyWLPass2(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct)
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t final)
+ {
+ u8 dimm;
+ u16 DIMMValid;
+@@ -187,12 +187,15 @@ static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq)
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1
+ */
+ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, uint8_t Pass)
++ struct DCTStatStruc *pDCTstatA, uint8_t Node, uint8_t Pass)
+ {
+ uint8_t status;
+ uint8_t timeout;
+ uint16_t final_target_freq;
+
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++
+ pDCTstat->C_MCTPtr = &(pDCTstat->s_C_MCTPtr);
+ pDCTstat->C_DCTPtr[0] = &(pDCTstat->s_C_DCTPtr[0]);
+ pDCTstat->C_DCTPtr[1] = &(pDCTstat->s_C_DCTPtr[1]);
+@@ -240,13 +243,13 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+ pDCTstat->TargetFreq = fam15h_next_highest_memclk_freq(pDCTstat->Speed);
+ else
+ pDCTstat->TargetFreq = final_target_freq;
+- SetTargetFreq(pMCTstat, pDCTstat);
++ SetTargetFreq(pMCTstat, pDCTstatA, Node);
+ timeout = 0;
+ do {
+ status = 0;
+ timeout++;
+- status |= PhyWLPass2(pMCTstat, pDCTstat, 0);
+- status |= PhyWLPass2(pMCTstat, pDCTstat, 1);
++ status |= PhyWLPass2(pMCTstat, pDCTstat, 0, (pDCTstat->TargetFreq == final_target_freq));
++ status |= PhyWLPass2(pMCTstat, pDCTstat, 1, (pDCTstat->TargetFreq == final_target_freq));
+ if (status)
+ printk(BIOS_INFO,
+ "%s: Retrying write levelling due to invalid value(s) detected in last phase\n",
+@@ -290,7 +293,7 @@ void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+ if (pDCTstat->NodePresent) {
+ mctSMBhub_Init(Node);
+ Clear_OnDimmMirror(pMCTstat, pDCTstat);
+- WriteLevelization_HW(pMCTstat, pDCTstat, Pass);
++ WriteLevelization_HW(pMCTstat, pDCTstatA, Node, Pass);
+ Restore_OnDimmMirror(pMCTstat, pDCTstat);
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
+index 9617f84..624a543 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
+@@ -18,6 +18,78 @@
+ * Foundation, Inc.
+ */
+
++static uint8_t fam15h_rdimm_rc2_control_code(struct DCTStatStruc *pDCTstat, uint8_t dct)
++{
++ uint8_t MaxDimmsInstallable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH);
++
++ uint8_t package_type;
++ uint8_t control_code = 0;
++
++ package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ /* Obtain number of DIMMs on channel */
++ uint8_t dimm_count = pDCTstat->MAdimms[dct];
++
++ /* FIXME
++ * Assume there is only one register on the RDIMM for now
++ */
++ uint8_t num_registers = 1;
++
++ if (package_type == PT_GR) {
++ /* Socket G34 */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.7.1.2.1 Table 85 */
++ if (MaxDimmsInstallable == 1) {
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ control_code = 0x1;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (num_registers == 1) {
++ control_code = 0x0;
++ } else {
++ control_code = 0x1;
++ }
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ control_code = 0x0;
++ }
++ } else if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ control_code = 0x1;
++ } else if ((MemClkFreq >= 0xa) && (MemClkFreq <= 0x12)) {
++ /* DDR3-1066 - DDR3-1600 */
++ if (num_registers == 1) {
++ control_code = 0x0;
++ } else {
++ control_code = 0x1;
++ }
++ }
++ } else if (dimm_count == 2) {
++ /* 2 DIMMs detected */
++ if (num_registers == 1) {
++ control_code = 0x1;
++ } else {
++ control_code = 0x8;
++ }
++ }
++ } else if (MaxDimmsInstallable == 3) {
++ /* TODO
++ * 3 DIMM/channel support unimplemented
++ */
++ }
++ } else {
++ /* TODO
++ * Other socket support unimplemented
++ */
++ }
++
++ return control_code;
++}
++
+ static uint16_t memclk_to_freq(uint16_t memclk) {
+ uint16_t fam10h_freq_tab[] = {0, 0, 0, 400, 533, 667, 800};
+ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
+@@ -37,36 +109,46 @@ static uint16_t memclk_to_freq(uint16_t memclk) {
+ return mem_freq;
+ }
+
++static uint8_t rc_word_chip_select_lower_bit(void) {
++ if (is_fam15h()) {
++ return 21;
++ } else {
++ return 20;
++ }
++}
++
++static uint32_t rc_word_address_to_ctl_bits(uint32_t address) {
++ if (is_fam15h()) {
++ return (((address >> 3) & 0x1) << 2) << 18 | (address & 0x7);
++ } else {
++ return (((address >> 3) & 0x1) << 2) << 16 | (address & 0x7);
++ }
++}
++
+ static uint32_t rc_word_value_to_ctl_bits(uint32_t value) {
+- return ((value >> 2) & 3) << 16 | ((value & 3) << 3);
++ if (is_fam15h()) {
++ return ((value >> 2) & 0x3) << 18 | ((value & 0x3) << 3);
++ } else {
++ return ((value >> 2) & 0x3) << 16 | ((value & 0x3) << 3);
++ }
+ }
+
+ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u32 MrsChipSel, u32 CtrlWordNum)
++ struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MrsChipSel, u32 CtrlWordNum)
+ {
+ u8 Dimms, DimmNum;
+ u32 val;
+- u32 dct = 0;
+ uint8_t ddr_voltage_index;
+ uint16_t mem_freq;
+ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+ uint8_t MaxDimmsInstallable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH);
+
+- DimmNum = (MrsChipSel >> 20) & 0xFE;
++ DimmNum = (MrsChipSel >> rc_word_chip_select_lower_bit()) & 0xfe;
+
+- /* assume dct=0; */
+- /* if (dct == 1) */
+- /* DimmNum ++; */
+- /* cl +=8; */
++ if (dct == 1)
++ DimmNum++;
+
+ mem_freq = memclk_to_freq(pDCTstat->DIMMAutoSpeed);
+-
+- if (pDCTstat->CSPresent_DCT[0] > 0) {
+- dct = 0;
+- } else if (pDCTstat->CSPresent_DCT[1] > 0 ) {
+- dct = 1;
+- DimmNum++;
+- }
+ Dimms = pDCTstat->MAdimms[dct];
+
+ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
+@@ -76,21 +158,25 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+ val = 0x2;
+ else if (CtrlWordNum == 1) {
+ if (!((pDCTstat->DimmDRPresent | pDCTstat->DimmQRPresent) & (1 << DimmNum)))
+- val = 0xC; /* if single rank, set DBA1 and DBA0 */
++ val = 0xc; /* if single rank, set DBA1 and DBA0 */
+ } else if (CtrlWordNum == 2) {
+- if (package_type == PT_GR) {
+- /* Socket G34 */
+- if (MaxDimmsInstallable == 2) {
+- if (Dimms > 1)
+- val = 0x4;
++ if (is_fam15h()) {
++ val = fam15h_rdimm_rc2_control_code(pDCTstat, dct);
++ } else {
++ if (package_type == PT_GR) {
++ /* Socket G34 */
++ if (MaxDimmsInstallable == 2) {
++ if (Dimms > 1)
++ val = 0x4;
++ }
+ }
+ }
+ } else if (CtrlWordNum == 3) {
+- val = (pDCTstat->CtrlWrd3 >> (DimmNum << 2)) & 0xFF;
++ val = (pDCTstat->CtrlWrd3 >> (DimmNum << 2)) & 0xff;
+ } else if (CtrlWordNum == 4) {
+- val = (pDCTstat->CtrlWrd4 >> (DimmNum << 2)) & 0xFF;
++ val = (pDCTstat->CtrlWrd4 >> (DimmNum << 2)) & 0xff;
+ } else if (CtrlWordNum == 5) {
+- val = (pDCTstat->CtrlWrd5 >> (DimmNum << 2)) & 0xFF;
++ val = (pDCTstat->CtrlWrd5 >> (DimmNum << 2)) & 0xff;
+ } else if (CtrlWordNum == 8) {
+ if (package_type == PT_GR) {
+ /* Socket G34 */
+@@ -99,7 +185,7 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+ }
+ }
+ } else if (CtrlWordNum == 9) {
+- val = 0xD; /* DBA1, DBA0, DA3 = 0 */
++ val = 0xd; /* DBA1, DBA0, DA3 = 0 */
+ } else if (CtrlWordNum == 10) {
+ val = 0x0; /* Lowest operating frequency */
+ } else if (CtrlWordNum == 11) {
+@@ -114,43 +200,30 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+ }
+ val &= 0xf;
+
+- printk(BIOS_SPEW, "Preparing to send DIMM RC%d: %02x\n", CtrlWordNum, val);
++ printk(BIOS_SPEW, "Preparing to send DCT %d DIMM RC%d: %02x\n", dct, CtrlWordNum, val);
+
+ val = MrsChipSel | rc_word_value_to_ctl_bits(val);
+-
+- /* transfer Control word number to address [BA2,A2,A1,A0] */
+- if (CtrlWordNum > 7) {
+- val |= 1 << 18;
+- CtrlWordNum &= 7;
+- }
+- val |= CtrlWordNum;
++ val |= rc_word_address_to_ctl_bits(CtrlWordNum);
+
+ return val;
+ }
+
+ static void mct_SendCtrlWrd(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u32 val)
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t val)
+ {
+- uint8_t dct = 0;
+ u32 dev = pDCTstat->dev_dct;
+
+- if (pDCTstat->CSPresent_DCT[0] > 0) {
+- dct = 0;
+- } else if (pDCTstat->CSPresent_DCT[1] > 0 ){
+- dct = 1;
+- }
+-
+- val |= Get_NB32_DCT(dev, dct, 0x7C) & ~0xFFFFFF;
++ val |= Get_NB32_DCT(dev, dct, 0x7c) & ~0xffffff;
+ val |= 1 << SendControlWord;
+- Set_NB32_DCT(dev, dct, 0x7C, val);
++ Set_NB32_DCT(dev, dct, 0x7c, val);
+
+ do {
+- val = Get_NB32_DCT(dev, dct, 0x7C);
++ val = Get_NB32_DCT(dev, dct, 0x7c);
+ } while (val & (1 << SendControlWord));
+ }
+
+ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct)
++ struct DCTStatStruc *pDCTstat, uint8_t dct)
+ {
+ u8 MrsChipSel;
+ u32 dev = pDCTstat->dev_dct;
+@@ -163,7 +236,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+ for (MrsChipSel = 0; MrsChipSel < 8; MrsChipSel ++, MrsChipSel ++) {
+ if (pDCTstat->CSPresent & (1 << MrsChipSel)) {
+ val = Get_NB32_DCT(dev, dct, 0xa8);
+- val &= ~(0xF << 8);
++ val &= ~(0xf << 8);
+
+ switch (MrsChipSel) {
+ case 0:
+@@ -184,8 +257,8 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+ for (cw=0; cw <=15; cw ++) {
+ mct_Wait(1600);
+ if (!(cw==6 || cw==7)) {
+- val = mct_ControlRC(pMCTstat, pDCTstat, MrsChipSel << 20, cw);
+- mct_SendCtrlWrd(pMCTstat, pDCTstat, val);
++ val = mct_ControlRC(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit(), cw);
++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, val);
+ }
+ }
+ }
+@@ -195,7 +268,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+ }
+
+ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat)
++ struct DCTStatStruc *pDCTstat, uint8_t dct)
+ {
+ u32 SaveSpeed = pDCTstat->DIMMAutoSpeed;
+ u32 MrsChipSel;
+@@ -208,10 +281,10 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
+ for (MrsChipSel=0; MrsChipSel < 8; MrsChipSel++, MrsChipSel++) {
+ if (pDCTstat->CSPresent & (1 << MrsChipSel)) {
+ /* 2. Program F2x[1, 0]A8[CtrlWordCS]=bit mask for target chip selects. */
+- val = Get_NB32_DCT(dev, 0, 0xA8); /* TODO: dct 0 / 1 select */
+- val &= ~(0xFF << 8);
+- val |= (0x3 << (MrsChipSel & 0xFE)) << 8;
+- Set_NB32_DCT(dev, 0, 0xA8, val); /* TODO: dct 0 / 1 select */
++ val = Get_NB32_DCT(dev, dct, 0xa8);
++ val &= ~(0xff << 8);
++ val |= (0x3 << (MrsChipSel & 0xfe)) << 8;
++ Set_NB32_DCT(dev, dct, 0xa8, val);
+
+ /* Resend control word 10 */
+ uint8_t freq_ctl_val = 0;
+@@ -235,21 +308,21 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
+ break;
+ }
+
+- printk(BIOS_SPEW, "Preparing to send DIMM RC%d: %02x\n", 10, freq_ctl_val);
++ printk(BIOS_SPEW, "Preparing to send DCT %d DIMM RC%d: %02x\n", dct, 10, freq_ctl_val);
+
+- mct_SendCtrlWrd(pMCTstat, pDCTstat, MrsChipSel << 20 | 0x40002 | rc_word_value_to_ctl_bits(freq_ctl_val));
++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit() | rc_word_address_to_ctl_bits(10) | rc_word_value_to_ctl_bits(freq_ctl_val));
+
+ mct_Wait(1600);
+
+ /* Resend control word 2 */
+- val = mct_ControlRC(pMCTstat, pDCTstat, MrsChipSel << 20, 2);
+- mct_SendCtrlWrd(pMCTstat, pDCTstat, val);
++ val = mct_ControlRC(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit(), 2);
++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, val);
+
+ mct_Wait(1600);
+
+ /* Resend control word 8 */
+- val = mct_ControlRC(pMCTstat, pDCTstat, MrsChipSel << 20, 8);
+- mct_SendCtrlWrd(pMCTstat, pDCTstat, val);
++ val = mct_ControlRC(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit(), 8);
++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, val);
+
+ mct_Wait(1600);
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+index 9ccf77e..09a5f68 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+@@ -445,13 +445,13 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat,
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret;
+
++ /* The formula for chip select number is: CS = dimm*2+rank */
++ uint8_t dimm = MrsChipSel / 2;
++ uint8_t rank = MrsChipSel % 2;
++
+ if (is_fam15h()) {
+ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+
+- /* The formula for chip select number is: CS = dimm*2+rank */
+- uint8_t dimm = MrsChipSel / 2;
+- uint8_t rank = MrsChipSel % 2;
+-
+ /* FIXME: These parameters should be configurable
+ * For now, err on the side of caution and enable automatic 2x refresh
+ * when the DDR temperature rises above the internal limits
+@@ -496,7 +496,7 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat,
+ ret |= ((dword >> 10) & 3) << 9;
+ }
+
+- printk(BIOS_SPEW, "Going to send MR2 control word %08x\n", ret);
++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR2 control word %08x\n", dct, dimm, rank, ret);
+
+ return ret;
+ }
+@@ -507,6 +507,10 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat,
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret;
+
++ /* The formula for chip select number is: CS = dimm*2+rank */
++ uint8_t dimm = MrsChipSel / 2;
++ uint8_t rank = MrsChipSel % 2;
++
+ if (is_fam15h()) {
+ ret = 0xc0000;
+ ret |= (MrsChipSel << 21);
+@@ -527,7 +531,7 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat,
+ ret |= (dword >> 24) & 7;
+ }
+
+- printk(BIOS_SPEW, "Going to send MR3 control word %08x\n", ret);
++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR3 control word %08x\n", dct, dimm, rank, ret);
+
+ return ret;
+ }
+@@ -538,6 +542,10 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret;
+
++ /* The formula for chip select number is: CS = dimm*2+rank */
++ uint8_t dimm = MrsChipSel / 2;
++ uint8_t rank = MrsChipSel % 2;
++
+ if (is_fam15h()) {
+ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+
+@@ -553,10 +561,6 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
+ ret = 0x40000;
+ ret |= (MrsChipSel << 21);
+
+- /* The formula for chip select number is: CS = dimm*2+rank */
+- uint8_t dimm = MrsChipSel / 2;
+- uint8_t rank = MrsChipSel % 2;
+-
+ /* Determine if TQDS should be set */
+ if ((pDCTstat->Dimmx8Present & (1 << dimm))
+ && (((dimm & 0x1)?(pDCTstat->Dimmx4Present&0x55):(pDCTstat->Dimmx4Present&0xaa)) != 0x0)
+@@ -623,7 +627,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
+ ret |= 1 << 12;
+ }
+
+- printk(BIOS_SPEW, "Going to send MR1 control word %08x\n", ret);
++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR1 control word %08x\n", dct, dimm, rank, ret);
+
+ return ret;
+ }
+@@ -634,6 +638,10 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret, dword2;
+
++ /* The formula for chip select number is: CS = dimm*2+rank */
++ uint8_t dimm = MrsChipSel / 2;
++ uint8_t rank = MrsChipSel % 2;
++
+ if (is_fam15h()) {
+ ret = 0x00000;
+ ret |= (MrsChipSel << 21);
+@@ -744,7 +752,7 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
+ ret |= 1 << 8;
+ }
+
+- printk(BIOS_SPEW, "Going to send MR0 control word %08x\n", ret);
++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR0 control word %08x\n", dct, dimm, rank, ret);
+
+ return ret;
+ }
+@@ -811,6 +819,16 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat,
+ /* 8.wait 360ns */
+ mct_Wait(80);
+
++ /* Set up address parity */
++ if ((pDCTstat->Status & (1 << SB_Registered))
++ || (pDCTstat->Status & (1 << SB_LoadReduced))) {
++ if (is_fam15h()) {
++ dword = Get_NB32_DCT(dev, dct, 0x90);
++ dword |= 1 << ParEn;
++ Set_NB32_DCT(dev, dct, 0x90, dword);
++ }
++ }
++
+ /* The following steps are performed with registered DIMMs only and
+ * must be done for each chip select pair */
+ if (pDCTstat->Status & (1 << SB_Registered))
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+index 981f467..707e6a9 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+@@ -1146,8 +1146,10 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
+ uint8_t dimm;
+ uint8_t rank;
+ uint8_t lane;
++ uint8_t nibble;
+ uint8_t mem_clk;
+ uint16_t initial_seed;
++ uint8_t train_both_nibbles;
+ uint16_t current_total_delay[MAX_BYTE_LANES];
+ uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES];
+ uint16_t rank0_current_total_delay[MAX_BYTE_LANES];
+@@ -1163,6 +1165,11 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
+ print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
+ print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
+
++ train_both_nibbles = 0;
++ if (pDCTstat->Dimmx4Present)
++ if (is_fam15h())
++ train_both_nibbles = 1;
++
+ dev = pDCTstat->dev_dct;
+ index_reg = 0x98;
+ ch_start = 0;
+@@ -1245,132 +1252,148 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
+ else
+ _2Ranks = 0;
+ for (rank = 0; rank < (_2Ranks + 1); rank++) {
+- /* 2.10.5.8.2 (1)
+- * Specify the target DIMM to be trained
+- * Set TrNibbleSel = 0
+- *
+- * TODO: Add support for x4 DIMMs
+- */
+- dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
+- dword &= ~(0x3 << 4); /* TrDimmSel */
+- dword |= ((dimm & 0x3) << 4);
+- dword &= ~(0x1 << 2); /* TrNibbleSel */
+- Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
+-
+- /* 2.10.5.8.2 (2)
+- * Retrieve gross and fine timing fields from write DQS registers
+- */
+- read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
++ for (nibble = 0; nibble < (train_both_nibbles + 1); nibble++) {
++ /* 2.10.5.8.2 (1)
++ * Specify the target DIMM and nibble to be trained
++ */
++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
++ dword &= ~(0x3 << 4); /* TrDimmSel = dimm */
++ dword |= ((dimm & 0x3) << 4);
++ dword &= ~(0x1 << 2); /* TrNibbleSel = nibble */
++ dword |= ((nibble & 0x1) << 2);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
++
++ /* 2.10.5.8.2 (2)
++ * Retrieve gross and fine timing fields from write DQS registers
++ */
++ read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+- /* 2.10.5.8.2.1
+- * Generate the DQS Receiver Enable Training Seed Values
+- */
+- if (Pass == FirstPass) {
+- initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type);
++ /* 2.10.5.8.2.1
++ * Generate the DQS Receiver Enable Training Seed Values
++ */
++ if (Pass == FirstPass) {
++ initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type);
+
+- /* Adjust seed for the minimum platform supported frequency */
+- initial_seed = (uint16_t) (((((uint64_t) initial_seed) *
+- fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
++ /* Adjust seed for the minimum platform supported frequency */
++ initial_seed = (uint16_t) (((((uint64_t) initial_seed) *
++ fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
+
+- for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+- uint16_t wl_pass1_delay;
+- wl_pass1_delay = current_total_delay[lane];
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint16_t wl_pass1_delay;
++ wl_pass1_delay = current_total_delay[lane];
+
+- seed[lane] = initial_seed + wl_pass1_delay;
+- }
+- } else {
+- uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
+- uint16_t register_delay;
+- int16_t seed_prescaling;
+-
+- memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay));
+- if ((pDCTstat->Status & (1 << SB_Registered))) {
+- if (addr_prelaunch)
+- register_delay = 0x30;
+- else
+- register_delay = 0x20;
+- } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) {
+- /* TODO
+- * Load reduced DIMM support unimplemented
+- */
+- register_delay = 0x0;
++ seed[lane] = initial_seed + wl_pass1_delay;
++ }
+ } else {
+- register_delay = 0x0;
++ uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
++ uint16_t register_delay;
++ int16_t seed_prescaling;
++
++ memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay));
++ if ((pDCTstat->Status & (1 << SB_Registered))) {
++ if (addr_prelaunch)
++ register_delay = 0x30;
++ else
++ register_delay = 0x20;
++ } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) {
++ /* TODO
++ * Load reduced DIMM support unimplemented
++ */
++ register_delay = 0x0;
++ } else {
++ register_delay = 0x0;
++ }
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ seed_prescaling = current_total_delay[lane] - register_delay - 0x20;
++ seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
++ }
+ }
+
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+- seed_prescaling = current_total_delay[lane] - register_delay - 0x20;
+- seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
+- }
+- }
++ seed_gross[lane] = (seed[lane] >> 5) & 0x1f;
++ seed_fine[lane] = seed[lane] & 0x1f;
+
+- for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+- seed_gross[lane] = (seed[lane] >> 5) & 0x1f;
+- seed_fine[lane] = seed[lane] & 0x1f;
++ /*if (seed_gross[lane] == 0)
++ seed_pre_gross[lane] = 0;
++ else */if (seed_gross[lane] & 0x1)
++ seed_pre_gross[lane] = 1;
++ else
++ seed_pre_gross[lane] = 2;
+
+- /*if (seed_gross[lane] == 0)
+- seed_pre_gross[lane] = 0;
+- else */if (seed_gross[lane] & 0x1)
+- seed_pre_gross[lane] = 1;
+- else
+- seed_pre_gross[lane] = 2;
++ /* Calculate phase recovery delays */
++ phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f);
+
+- /* Calculate phase recovery delays */
+- phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f);
++ /* Set the gross delay.
++ * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears
++ * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well.
++ */
++ current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5);
++ }
+
+- /* Set the gross delay.
+- * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears
+- * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well.
++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6)
++ * Program PhRecFineDly and PhRecGrossDly
+ */
+- current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5);
+- }
++ write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
+
+- /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6)
+- * Program PhRecFineDly and PhRecGrossDly
+- */
+- write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7)
++ * Program the DQS Receiver Enable delay values for each lane
++ */
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+- /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7)
+- * Program the DQS Receiver Enable delay values for each lane
+- */
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
++ /* 2.10.5.8.2 (3)
++ * Program DqsRcvTrEn = 1
++ */
++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
++ dword |= (0x1 << 13);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
+
+- /* 2.10.5.8.2 (3)
+- * Program DqsRcvTrEn = 1
+- */
+- dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
+- dword |= (0x1 << 13);
+- Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
++ /* 2.10.5.8.2 (4)
++ * Issue 192 read requests to the target rank
++ */
++ generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1));
+
+- /* 2.10.5.8.2 (4)
+- * Issue 192 read requests to the target rank
+- */
+- generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1));
++ /* 2.10.5.8.2 (5)
++ * Program DqsRcvTrEn = 0
++ */
++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
++ dword &= ~(0x1 << 13);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
+
+- /* 2.10.5.8.2 (5)
+- * Program DqsRcvTrEn = 0
+- */
+- dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
+- dword &= ~(0x1 << 13);
+- Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
++ /* 2.10.5.8.2 (6)
++ * Read PhRecGrossDly, PhRecFineDly
++ */
++ read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
+
+- /* 2.10.5.8.2 (6)
+- * Read PhRecGrossDly, PhRecFineDly
+- */
+- read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
++ /* 2.10.5.8.2 (7)
++ * Calculate and program the DQS Receiver Enable delay values
++ */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f);
++ current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5);
++ if (nibble == 0) {
++ if (lane == 8)
++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
++ else
++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
++ } else {
++ /* 2.10.5.8.2 (1)
++ * Average the trained values of both nibbles on x4 DIMMs
++ */
++ if (lane == 8)
++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = (pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] + current_total_delay[lane]) / 2;
++ else
++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = (pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] + current_total_delay[lane]) / 2;
++ }
++ }
+
+- /* 2.10.5.8.2 (7)
+- * Calculate and program the DQS Receiver Enable delay values
+- */
+- for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+- current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f);
+- current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5);
+- if (lane == 8)
+- pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
+- else
+- pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
++#if DQS_TRAIN_DEBUG > 1
++ for (lane = 0; lane < 8; lane++)
++ printk(BIOS_DEBUG, "\t\tTrainRcvEn55: Channel: %d dimm: %d nibble: %d lane %d current_total_delay: %04x CH_D_B_RCVRDLY: %04x\n",
++ Channel, dimm, nibble, lane, current_total_delay[lane], pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane]);
++#endif
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+ }
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+ if (rank == 0) {
+ /* Back up the Rank 0 delays for later use */
+@@ -1395,7 +1418,7 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
+
+ #if DQS_TRAIN_DEBUG > 0
+ for (lane = 0; lane < 8; lane++)
+- print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
++ print_debug_dqs_pair("\t\tTrainRcvEn56: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
+ #endif
+ }
+ }
+@@ -1815,15 +1838,23 @@ void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ }
+
+ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstatA)
++ struct DCTStatStruc *pDCTstatA, int16_t single_node_number)
+ {
+ u8 Node = 0;
+ struct DCTStatStruc *pDCTstat;
+
+ printk(BIOS_DEBUG, "%s: Start\n", __func__);
+
++ uint8_t start_node = 0;
++ uint8_t end_node = MAX_NODES_SUPPORTED;
++
++ if (single_node_number >= 0) {
++ start_node = single_node_number;
++ end_node = single_node_number;
++ }
++
+ /* FIXME: skip for Ax */
+- for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ for (Node = start_node; Node < end_node; Node++) {
+ pDCTstat = pDCTstatA + Node;
+ if (!pDCTstat->NodePresent)
+ continue;
+@@ -1847,6 +1878,8 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
+ if (!pDCTstat->DIMMValidDCT[dct])
+ continue;
+
++ printk(BIOS_SPEW, "%s: training node %d DCT %d\n", __func__, Node, dct);
++
+ /* Back up D18F2x9C_x0000_0004_dct[1:0] */
+ datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004);
+
+@@ -1985,6 +2018,8 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
+
+ /* Restore D18F2x9C_x0000_0004_dct[1:0] */
+ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup);
++
++ printk(BIOS_SPEW, "%s: done training node %d DCT %d\n", __func__, Node, dct);
+ }
+ } else {
+ fenceDynTraining_D(pMCTstat, pDCTstat, 0);
+@@ -1997,7 +2032,7 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
+ }
+
+ static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct)
++ struct DCTStatStruc *pDCTstat, uint8_t dct)
+ {
+ u16 avRecValue;
+ u32 val;
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+index 6b63ba0..3153e46 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+@@ -19,7 +19,7 @@
+ */
+
+ static void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat);
++ struct DCTStatStruc *pDCTstat, uint8_t dct);
+
+
+ static void AgesaDelay(u32 msec)
+@@ -353,11 +353,14 @@ static void ExitSelfRefresh(struct MCTStatStruc *pMCTstat,
+ }
+
+ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat)
++ struct DCTStatStruc *pDCTstatA, uint8_t Node)
+ {
+ uint32_t dword;
+ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++
+ if (is_fam15h()) {
+ /* Program F2x[1, 0]90[DisDllShutDownSR]=1. */
+ if (pDCTstat->DIMMValidDCT[0]) {
+@@ -391,7 +394,7 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+ uint8_t dct;
+ for (dct = 0; dct < 2; dct++) {
+ if (pDCTstat->DIMMValidDCT[dct]) {
+- phyAssistedMemFnceTraining(pMCTstat, pDCTstat);
++ phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, Node);
+ InitPhyCompensation(pMCTstat, pDCTstat, dct);
+ }
+ }
+@@ -438,7 +441,12 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+ else
+ pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1];
+
+- FreqChgCtrlWrd(pMCTstat, pDCTstat);
++ if (pDCTstat->DIMMValidDCT[0]) {
++ FreqChgCtrlWrd(pMCTstat, pDCTstat, 0);
++ }
++ if (pDCTstat->DIMMValidDCT[1]) {
++ FreqChgCtrlWrd(pMCTstat, pDCTstat, 1);
++ }
+ }
+ }
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+index e5e4031..73b231e 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+@@ -35,9 +35,9 @@ u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue);
+ void prepareDimms(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+ u8 dct, u8 dimm, BOOL wl);
+ void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm);
+-void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass);
++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t pass, uint8_t nibble);
+ void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, u8 targetAddr, uint8_t pass);
+-void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass);
++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass, uint8_t nibble);
+
+ static int32_t abs(int32_t val) {
+ if (val < 0)
+@@ -76,6 +76,8 @@ uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT
+ {
+ u8 ByteLane;
+ u32 Value, Addr;
++ uint8_t nibble = 0;
++ uint8_t train_both_nibbles;
+ u16 Addl_Data_Offset, Addl_Data_Port;
+ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
+ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+@@ -88,98 +90,108 @@ uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT
+ DRAM_ADD_DCT_PHY_CONTROL_REG, TrDimmSelStart,
+ TrDimmSelEnd, (u32)dimm);
+
+- if (is_fam15h()) {
+- /* Set TrNibbleSel = 0
+- *
+- * TODO: Add support for x4 DIMMs
+- */
+- set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+- DRAM_ADD_DCT_PHY_CONTROL_REG, 2,
+- 2, (u32)0);
+- }
++ train_both_nibbles = 0;
++ if (pDCTstat->Dimmx4Present)
++ if (is_fam15h())
++ train_both_nibbles = 1;
+
+- /* 2. Prepare the DIMMs for write levelization using DDR3-defined
+- * MR commands. */
+- prepareDimms(pMCTstat, pDCTstat, dct, dimm, TRUE);
++ for (nibble = 0; nibble < (train_both_nibbles + 1); nibble++) {
++ printk(BIOS_SPEW, "AgesaHwWlPhase1: training nibble %d\n", nibble);
+
+- /* 3. After the DIMMs are configured, BIOS waits 40 MEMCLKs to
+- * satisfy DDR3-defined internal DRAM timing.
+- */
+- if (is_fam15h())
+- precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 40);
+- else
+- pMCTData->AgesaDelay(40);
++ if (is_fam15h()) {
++ /* Program F2x[1, 0]9C_x08[WrtLvTrEn]=0 */
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0);
++
++ /* Set TrNibbleSel */
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_ADD_DCT_PHY_CONTROL_REG, 2,
++ 2, (uint32_t)nibble);
++ }
+
+- /* 4. Configure the processor's DDR phy for write levelization training: */
+- procConfig(pMCTstat, pDCTstat, dct, dimm, pass);
++ /* 2. Prepare the DIMMs for write levelization using DDR3-defined
++ * MR commands. */
++ prepareDimms(pMCTstat, pDCTstat, dct, dimm, TRUE);
+
+- /* 5. Begin write levelization training:
+- * Program F2x[1, 0]9C_x08[WrtLvTrEn]=1. */
+- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL))
+- {
+- set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+- DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 1);
+- }
+- else
+- {
+- /* Broadcast write to all D3Dbyte chipset register offset 0xc
+- * Set bit 0 (wrTrain)
+- * Program bit 4 to nibble being trained (only matters for x4dimms)
+- * retain value of 3:2 (Trdimmsel)
+- * reset bit 5 (FrzPR)
++ /* 3. After the DIMMs are configured, BIOS waits 40 MEMCLKs to
++ * satisfy DDR3-defined internal DRAM timing.
+ */
+- if (dct)
++ if (is_fam15h())
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 40);
++ else
++ pMCTData->AgesaDelay(40);
++
++ /* 4. Configure the processor's DDR phy for write levelization training: */
++ procConfig(pMCTstat, pDCTstat, dct, dimm, pass, nibble);
++
++ /* 5. Begin write levelization training:
++ * Program F2x[1, 0]9C_x08[WrtLvTrEn]=1. */
++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL))
+ {
+- Addl_Data_Offset=0x198;
+- Addl_Data_Port=0x19C;
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 1);
+ }
+ else
+ {
+- Addl_Data_Offset=0x98;
+- Addl_Data_Port=0x9C;
++ /* Broadcast write to all D3Dbyte chipset register offset 0xc
++ * Set bit 0 (wrTrain)
++ * Program bit 4 to nibble being trained (only matters for x4dimms)
++ * retain value of 3:2 (Trdimmsel)
++ * reset bit 5 (FrzPR)
++ */
++ if (dct)
++ {
++ Addl_Data_Offset=0x198;
++ Addl_Data_Port=0x19C;
++ }
++ else
++ {
++ Addl_Data_Offset=0x98;
++ Addl_Data_Port=0x9C;
++ }
++ Addr=0x0D00000C;
++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr);
++ while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset,
++ DctAccessDone, DctAccessDone)) == 0);
++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value);
++ Value = bitTestSet(Value, 0); /* enable WL training */
++ Value = bitTestReset(Value, 4); /* for x8 only */
++ Value = bitTestReset(Value, 5); /* for hardware WL training */
++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value);
++ Addr=0x4D030F0C;
++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr);
++ while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset,
++ DctAccessDone, DctAccessDone)) == 0);
+ }
+- Addr=0x0D00000C;
+- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr);
+- while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset,
+- DctAccessDone, DctAccessDone)) == 0);
+- AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value);
+- Value = bitTestSet(Value, 0); /* enable WL training */
+- Value = bitTestReset(Value, 4); /* for x8 only */
+- Value = bitTestReset(Value, 5); /* for hardware WL training */
+- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value);
+- Addr=0x4D030F0C;
+- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr);
+- while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset,
+- DctAccessDone, DctAccessDone)) == 0);
+- }
+
+- if (is_fam15h())
+- proc_MFENCE();
++ if (is_fam15h())
++ proc_MFENCE();
+
+- /* Wait 200 MEMCLKs. If executing pass 2, wait 32 MEMCLKs. */
+- if (is_fam15h())
+- precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 200);
+- else
+- pMCTData->AgesaDelay(140);
++ /* Wait 200 MEMCLKs. If executing pass 2, wait 32 MEMCLKs. */
++ if (is_fam15h())
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 200);
++ else
++ pMCTData->AgesaDelay(140);
+
+- /* Program F2x[1, 0]9C_x08[WrtLevelTrEn]=0. */
+- set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+- DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0);
++ /* Program F2x[1, 0]9C_x08[WrtLevelTrEn]=0. */
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0);
+
+- /* Read from registers F2x[1, 0]9C_x[51:50] and F2x[1, 0]9C_x52
+- * to get the gross and fine delay settings
+- * for the target DIMM and save these values. */
+- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
+- getWLByteDelay(pDCTstat, dct, ByteLane, dimm, pass);
+- }
++ /* Read from registers F2x[1, 0]9C_x[51:50] and F2x[1, 0]9C_x52
++ * to get the gross and fine delay settings
++ * for the target DIMM and save these values. */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ getWLByteDelay(pDCTstat, dct, ByteLane, dimm, pass, nibble);
++ }
+
+- pDCTData->WLCriticalGrossDelayPrevPass = 0x1f;
++ pDCTData->WLCriticalGrossDelayPrevPass = 0x0;
++ }
+
+ return 0;
+ }
+
+ uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+- u8 dct, u8 dimm, u8 pass)
++ uint8_t dct, uint8_t dimm, uint8_t pass)
+ {
+ u8 ByteLane;
+ uint8_t status = 0;
+@@ -190,6 +202,12 @@ uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT
+ int32_t cgd = pDCTData->WLCriticalGrossDelayPrevPass;
+ uint8_t index = (uint8_t)(MAX_BYTE_LANES * dimm);
+
++ printk(BIOS_SPEW, "\toriginal critical gross delay: %d\n", cgd);
++
++ /* FIXME
++ * For now, disable CGD adjustment as it seems to interfere with registered DIMM training
++ */
++
+ /* Calculate the Critical Gross Delay */
+ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
+ /* Calculate the gross delay differential for this lane */
+@@ -205,6 +223,8 @@ uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT
+ cgd = gross_diff[ByteLane];
+ }
+
++ printk(BIOS_SPEW, "\tnew critical gross delay: %d\n", cgd);
++
+ pDCTData->WLCriticalGrossDelayPrevPass = cgd;
+
+ if (pDCTstat->Speed != pDCTstat->TargetFreq) {
+@@ -281,7 +301,7 @@ uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT
+ gross_diff[ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane] + pDCTData->WLGrossDelay[index+ByteLane];
+ gross_diff[ByteLane] -= pDCTData->WLSeedPreGrossDelay[index+ByteLane];
+
+- /* Prevent underflow in the presence of noise / instability*/
++ /* Prevent underflow in the presence of noise / instability */
+ if (gross_diff[ByteLane] < cgd)
+ gross_diff[ByteLane] = cgd;
+
+@@ -289,7 +309,8 @@ uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT
+ }
+ } else {
+ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8);
+- dword &= ~(0x3 << 24); /* WrDqDqsEarly = 0 */
++ dword &= ~(0x3 << 24); /* WrDqDqsEarly = pDCTData->WrDqsGrossDlyBaseOffset */
++ dword |= ((pDCTData->WrDqsGrossDlyBaseOffset & 0x3) << 24);
+ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8, dword);
+ }
+ }
+@@ -959,7 +980,7 @@ static uint16_t fam15h_next_lowest_memclk_freq(uint16_t memclk_freq)
+ #endif
+
+ /*-----------------------------------------------------------------------------
+- * void procConfig(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass)
++ * void procConfig(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass, u8 Nibble)
+ *
+ * Description:
+ * This function programs the ODT values for the NB
+@@ -972,13 +993,14 @@ static uint16_t fam15h_next_lowest_memclk_freq(uint16_t memclk_freq)
+ * OUT
+ * ----------------------------------------------------------------------------
+ */
+-void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass)
++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t pass, uint8_t nibble)
+ {
+ u8 ByteLane, MemClkFreq;
+ int32_t Seed_Gross;
+ int32_t Seed_Fine;
+ uint8_t Seed_PreGross;
+ u32 Value, Addr;
++ uint32_t dword;
+ u16 Addl_Data_Offset, Addl_Data_Port;
+ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
+ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+@@ -1048,10 +1070,17 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
+ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
+ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+ uint16_t Seed_Total = 0;
++ pDCTData->WrDqsGrossDlyBaseOffset = 0x0;
+ if (package_type == PT_GR) {
+ /* Socket G34: Fam15h BKDG v3.14 Table 96 */
+ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ /* TODO
++ * Implement mainboard-specific seed and
++ * WrDqsGrossDly base overrides.
++ * 0x41 and 0x0 are the "stock" values
++ */
+ Seed_Total = 0x41;
++ pDCTData->WrDqsGrossDlyBaseOffset = 0x2;
+ } else if (pDCTData->Status[DCT_STATUS_LOAD_REDUCED]) {
+ Seed_Total = 0x0;
+ } else {
+@@ -1133,15 +1162,16 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
+ printk(BIOS_SPEW, "\tLane %02x initial seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f));
+ }
+ } else {
+- /* Pass 2 */
+- /* From BKDG, Write Leveling Seed Value. */
+- if (is_fam15h()) {
+- uint32_t RegisterDelay;
+- int32_t SeedTotal;
+- int32_t SeedTotalPreScaling;
+- uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
++ if (nibble == 0) {
++ /* Pass 2 */
++ /* From BKDG, Write Leveling Seed Value. */
++ if (is_fam15h()) {
++ uint32_t RegisterDelay;
++ int32_t SeedTotal[MAX_BYTE_LANES];
++ int32_t SeedTotalPreScaling[MAX_BYTE_LANES];
++ uint32_t WrDqDqsEarly;
++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
+
+- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
+ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+ if (AddrCmdPrelaunch)
+ RegisterDelay = 0x30;
+@@ -1150,84 +1180,133 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
+ } else {
+ RegisterDelay = 0;
+ }
++
+ /* Retrieve WrDqDqsEarly */
+- AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId), FUN_DCT, 0xa8), 25, 24, &Value);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8);
++ WrDqDqsEarly = (dword >> 24) & 0x3;
+
+- /* Calculate adjusted seed values */
+- SeedTotal = (pDCTData->WLFineDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
+- ((pDCTData->WLGrossDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5);
+- SeedTotalPreScaling = (SeedTotal - RegisterDelay - (0x20 * Value));
+- SeedTotal = (int32_t) (RegisterDelay + ((((int64_t) SeedTotalPreScaling) *
+- fam15h_freq_tab[MemClkFreq] * 100) / (fam15h_freq_tab[pDCTData->WLPrevMemclkFreq] * 100)));
++ /* FIXME
++ * Ignore WrDqDqsEarly for now to work around training issues
++ */
++ WrDqDqsEarly = 0;
+
+- if (SeedTotal >= 0) {
+- Seed_Gross = SeedTotal / 32;
+- Seed_Fine = SeedTotal % 32;
+- } else {
+- Seed_Gross = (SeedTotal / 32) - 1;
+- Seed_Fine = (SeedTotal % 32) + 32;
++ /* Generate new seed values */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ /* Calculate adjusted seed values */
++ SeedTotal[ByteLane] = (pDCTData->WLFineDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
++ ((pDCTData->WLGrossDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5);
++ SeedTotalPreScaling[ByteLane] = (SeedTotal[ByteLane] - RegisterDelay - (0x20 * WrDqDqsEarly));
++ SeedTotal[ByteLane] = (int32_t) (RegisterDelay + ((((int64_t) SeedTotalPreScaling[ByteLane]) *
++ fam15h_freq_tab[MemClkFreq] * 100) / (fam15h_freq_tab[pDCTData->WLPrevMemclkFreq] * 100)));
+ }
+
+- if (Seed_Gross == 0)
+- Seed_PreGross = 0;
+- else if (Seed_Gross & 0x1)
+- Seed_PreGross = 1;
+- else
+- Seed_PreGross = 2;
++ /* Generate register values from seeds */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ printk(BIOS_SPEW, "\tLane %02x scaled delay: %04x\n", ByteLane, SeedTotal[ByteLane]);
+
+- /* Save seed values for later use */
+- pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
+- pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+- pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
++ if (SeedTotal[ByteLane] >= 0) {
++ Seed_Gross = SeedTotal[ByteLane] / 32;
++ Seed_Fine = SeedTotal[ByteLane] % 32;
++ } else {
++ Seed_Gross = (SeedTotal[ByteLane] / 32) - 1;
++ Seed_Fine = (SeedTotal[ByteLane] % 32) + 32;
++ }
+
+- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
+- pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ if (Seed_Gross == 0)
++ Seed_PreGross = 0;
++ else if (Seed_Gross & 0x1)
++ Seed_PreGross = 1;
++ else
++ Seed_PreGross = 2;
+
+- printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f));
+- }
+- } else {
+- uint32_t RegisterDelay;
+- uint32_t SeedTotalPreScaling;
+- uint32_t SeedTotal;
+- uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
+- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
+- {
+- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+- if (AddrCmdPrelaunch == 0)
+- RegisterDelay = 0x20;
++ /* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems
++ * due to the long register delays causing premature total delay wrap-around.
++ * Attempt to work around this...
++ */
++ Seed_PreGross = Seed_Gross;
++
++ /* Save seed values for later use */
++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
++
++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++
++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f));
++ }
++ } else {
++ uint32_t RegisterDelay;
++ uint32_t SeedTotalPreScaling;
++ uint32_t SeedTotal;
++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
++ {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ if (AddrCmdPrelaunch == 0)
++ RegisterDelay = 0x20;
++ else
++ RegisterDelay = 0x30;
++ } else {
++ RegisterDelay = 0;
++ }
++ SeedTotalPreScaling = ((pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
++ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5)) - RegisterDelay;
++ /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
++ training) - RegisterDelay. */
++ SeedTotal = (uint16_t) ((((uint64_t) SeedTotalPreScaling) *
++ fam10h_freq_tab[MemClkFreq] * 100) / (fam10h_freq_tab[3] * 100));
++ Seed_Gross = SeedTotal / 32;
++ Seed_Fine = SeedTotal & 0x1f;
++ if (Seed_Gross == 0)
++ Seed_Gross = 0;
++ else if (Seed_Gross & 0x1)
++ Seed_Gross = 1;
+ else
+- RegisterDelay = 0x30;
+- } else {
+- RegisterDelay = 0;
++ Seed_Gross = 2;
++
++ /* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems
++ * due to the long register delays causing premature total delay wrap-around.
++ * Attempt to work around this...
++ */
++ SeedTotal = ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f);
++ SeedTotal += RegisterDelay;
++ Seed_Gross = SeedTotal / 32;
++ Seed_Fine = SeedTotal & 0x1f;
++
++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++
++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f));
+ }
+- SeedTotalPreScaling = ((pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
+- (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5)) - RegisterDelay;
+- /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
+- training) - RegisterDelay. */
+- SeedTotal = (uint16_t) ((((uint64_t) SeedTotalPreScaling) *
+- fam10h_freq_tab[MemClkFreq] * 100) / (fam10h_freq_tab[3] * 100));
+- Seed_Gross = SeedTotal / 32;
+- Seed_Fine = SeedTotal & 0x1f;
+- if (Seed_Gross == 0)
+- Seed_Gross = 0;
+- else if (Seed_Gross & 0x1)
+- Seed_Gross = 1;
+- else
+- Seed_Gross = 2;
++ }
+
+- /* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems
+- * due to the long register delays causing premature total delay wrap-around.
+- * Attempt to work around this...
+- */
+- SeedTotal = ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f);
+- SeedTotal += RegisterDelay;
+- Seed_Gross = SeedTotal / 32;
+- Seed_Fine = SeedTotal & 0x1f;
++ /* Save initial seeds for upper nibble pass */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ pDCTData->WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane];
++ pDCTData->WLSeedGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane];
++ pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane];
++ }
++ } else {
++ /* Restore seed values from lower nibble pass */
++ if (is_fam15h()) {
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
+
+- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
+- pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
+
+- printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f));
++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f));
++ }
++ } else {
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane];
++
++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f));
++ }
+ }
+ }
+ }
+@@ -1358,7 +1437,7 @@ void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8
+ }
+
+ /*-----------------------------------------------------------------------------
+- * void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm)
++ * void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm, u8 Nibble)
+ *
+ * Description:
+ * This function reads the write levelization byte delay from the Phase
+@@ -1376,7 +1455,7 @@ void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8
+ *
+ *-----------------------------------------------------------------------------
+ */
+-void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass)
++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass, uint8_t nibble)
+ {
+ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+ u8 fineStartLoc, fineEndLoc, grossStartLoc, grossEndLoc, tempB, tempB1, index;
+@@ -1427,7 +1506,16 @@ void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8
+ fine = 0;
+ }
+ }
+- pDCTData->WLFineDelay[index+ByteLane] = (u8)fine;
+- pDCTData->WLGrossDelay[index+ByteLane] = (u8)gross;
+- printk(BIOS_SPEW, "\tLane %02x final adjusted value: %04x\n", ByteLane, ((gross & 0x1f) << 5) | (fine & 0x1f));
++ if (nibble == 0) {
++ pDCTData->WLFineDelay[index+ByteLane] = (uint8_t)fine;
++ pDCTData->WLGrossDelay[index+ByteLane] = (uint8_t)gross;
++ } else {
++ uint32_t WLTotalDelay = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f);
++ WLTotalDelay += ((gross & 0x1f) << 5) | (fine & 0x1f);
++ WLTotalDelay /= 2;
++ pDCTData->WLFineDelay[index+ByteLane] = (uint8_t)(WLTotalDelay & 0x1f);
++ pDCTData->WLGrossDelay[index+ByteLane] = (uint8_t)((WLTotalDelay >> 5) & 0x1f);
++ }
++
++ printk(BIOS_SPEW, "\tLane %02x adjusted value: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f));
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h
+index 12e7c4a..3337c14 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h
+@@ -119,16 +119,21 @@ typedef struct _sDCTStruct
+ u8 DctTrain; /* Current DCT being trained */
+ u8 CurrDct; /* Current DCT number (0 or 1) */
+ u8 DctCSPresent; /* Current DCT CS mapping */
++ uint8_t WrDqsGrossDlyBaseOffset;
+ int32_t WLSeedGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Gross Delay */
+ /* per byte Lane Per Logical DIMM*/
+ int32_t WLSeedFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Fine Delay */
+ /* per byte Lane Per Logical DIMM*/
+ int32_t WLSeedPreGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Pre-Gross Delay */
+ /* per byte Lane Per Logical DIMM*/
+- u8 WLGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Gross Delay */
+- /* per byte Lane Per Logical DIMM*/
+- u8 WLFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Fine Delay */
+- /* per byte Lane Per Logical DIMM*/
++ uint8_t WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*MAX_LDIMMS];
++ uint8_t WLSeedGrossPrevNibble[MAX_BYTE_LANES*MAX_LDIMMS];
++ uint8_t WLSeedFinePrevNibble[MAX_BYTE_LANES*MAX_LDIMMS];
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Gross Delay */
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Fine Delay */
++ /* per byte Lane Per Logical DIMM*/
+ u8 WLGrossDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Gross Delay */
+ /* per byte Lane Per Logical DIMM*/
+ u8 WLFineDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Fine Delay */
+--
+1.7.9.5
+