summaryrefslogtreecommitdiffstats
path: root/resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch
diff options
context:
space:
mode:
authorFrancis Rowe <info@gluglug.org.uk>2016-01-02 17:10:32 (EST)
committer Francis Rowe <info@gluglug.org.uk>2016-01-04 15:28:39 (EST)
commitd1f408f3725aa02bc1d76c4c6aadb4697bd073c0 (patch)
tree7eed036543ae1f8c57b56825880a722a8efbedf1 /resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch
parent91aec7e72005dcda72d19f2d024a02d8c0f86590 (diff)
downloadlibreboot-d1f408f3725aa02bc1d76c4c6aadb4697bd073c0.zip
libreboot-d1f408f3725aa02bc1d76c4c6aadb4697bd073c0.tar.gz
libreboot-d1f408f3725aa02bc1d76c4c6aadb4697bd073c0.tar.bz2
Use different coreboot revisions and patches per board
The release archives will be bigger, but this is a necessary change that makes libreboot development easier. At present, there are boards maintained in libreboot by different people. By doing it this way, that becomes much easier. This is in contrast to the present situation, where a change to one board potentially affects all other boards, especially when updating to a new version of coreboot. Coreboot-libre scripts, download scripts, build scripts - everything. The entire build system has been modified to reflect this change of development. For reasons of consistency, cbfstool and nvramtool are no longer included in the util archives.
Diffstat (limited to 'resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch')
-rw-r--r--resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch16249
1 files changed, 16249 insertions, 0 deletions
diff --git a/resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch b/resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch
new file mode 100644
index 0000000..fa979fe
--- /dev/null
+++ b/resources/libreboot/patch/coreboot/33fb4cf0ffb01be8bcb6b488872c87eb50e7d77f/grub/kgpe-d16/0034-cpu-amd-Add-initial-AMD-Family-15h-support.patch
@@ -0,0 +1,16249 @@
+From 429c96728e6a22e1d53f801c8bd4075a91fe422b Mon Sep 17 00:00:00 2001
+From: Timothy Pearson <tpearson@raptorengineeringinc.com>
+Date: Fri, 16 Oct 2015 13:51:51 -0500
+Subject: [PATCH 034/143] cpu/amd: Add initial AMD Family 15h support
+
+TEST: Booted ASUS KGPE-D16 with single Opteron 6380
+ * Unbuffered DDR3 DIMMs tested and working
+ * Suspend to RAM (S3) tested and working
+
+Conflicts:
+
+ src/cpu/amd/car/disable_cache_as_ram.c
+
+Change-Id: Idffd2ce36ce183fbfa087e5ba69a9148f084b45e
+Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com>
+---
+ src/cpu/amd/car/cache_as_ram.inc | 130 +-
+ src/cpu/amd/car/disable_cache_as_ram.c | 79 +-
+ src/cpu/amd/family_10h-family_15h/defaults.h | 266 +-
+ src/cpu/amd/family_10h-family_15h/fidvid.c | 237 +-
+ src/cpu/amd/family_10h-family_15h/init_cpus.c | 232 +-
+ .../amd/family_10h-family_15h/model_10xxx_init.c | 92 +-
+ src/cpu/amd/family_10h-family_15h/powernow_acpi.c | 50 +-
+ src/cpu/amd/family_10h-family_15h/processor_name.c | 194 +-
+ .../amd/family_10h-family_15h/update_microcode.c | 6 +
+ src/cpu/amd/model_fxx/init_cpus.c | 2 +-
+ src/cpu/amd/quadcore/quadcore.c | 109 +-
+ src/cpu/amd/quadcore/quadcore_id.c | 43 +-
+ src/include/cpu/amd/model_10xxx_msr.h | 7 +
+ src/mainboard/advansus/a785e-i/romstage.c | 2 +-
+ src/mainboard/amd/bimini_fam10/romstage.c | 2 +-
+ src/mainboard/amd/mahogany_fam10/romstage.c | 2 +-
+ .../amd/serengeti_cheetah_fam10/romstage.c | 2 +-
+ src/mainboard/amd/tilapia_fam10/romstage.c | 2 +-
+ src/mainboard/asus/kfsn4-dre/romstage.c | 2 +-
+ src/mainboard/asus/kgpe-d16/romstage.c | 4 +-
+ src/mainboard/asus/m4a78-em/romstage.c | 2 +-
+ src/mainboard/asus/m4a785-m/romstage.c | 2 +-
+ src/mainboard/asus/m5a88-v/romstage.c | 2 +-
+ src/mainboard/avalue/eax-785e/romstage.c | 2 +-
+ src/mainboard/gigabyte/ma785gm/romstage.c | 2 +-
+ src/mainboard/gigabyte/ma785gmt/romstage.c | 2 +-
+ src/mainboard/gigabyte/ma78gm/romstage.c | 2 +-
+ src/mainboard/hp/dl165_g6_fam10/romstage.c | 2 +-
+ src/mainboard/iei/kino-780am2-fam10/romstage.c | 2 +-
+ src/mainboard/jetway/pa78vm5/romstage.c | 2 +-
+ src/mainboard/msi/ms9652_fam10/romstage.c | 2 +-
+ src/mainboard/supermicro/h8dmr_fam10/romstage.c | 2 +-
+ src/mainboard/supermicro/h8qme_fam10/romstage.c | 2 +-
+ src/mainboard/supermicro/h8scm_fam10/romstage.c | 2 +-
+ src/mainboard/tyan/s2912_fam10/romstage.c | 2 +-
+ src/northbridge/amd/amdfam10/Kconfig | 2 +-
+ src/northbridge/amd/amdfam10/Makefile.inc | 2 +
+ src/northbridge/amd/amdfam10/amdfam10.h | 6 +-
+ src/northbridge/amd/amdfam10/amdfam10_util.c | 13 +-
+ src/northbridge/amd/amdfam10/link_control.c | 86 +
+ src/northbridge/amd/amdfam10/misc_control.c | 7 +
+ src/northbridge/amd/amdfam10/nb_control.c | 85 +
+ src/northbridge/amd/amdfam10/northbridge.c | 233 +-
+ src/northbridge/amd/amdfam10/raminit_amdmct.c | 304 +-
+ src/northbridge/amd/amdht/h3ncmn.c | 171 +-
+ src/northbridge/amd/amdht/ht_wrapper.c | 43 +-
+ src/northbridge/amd/amdmct/amddefs.h | 78 +-
+ src/northbridge/amd/amdmct/mct/mct_d.c | 4 +-
+ src/northbridge/amd/amdmct/mct/mct_d.h | 20 +-
+ src/northbridge/amd/amdmct/mct/mctpro_d.c | 21 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 3187 ++++++++++++++++----
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 124 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h | 9 +
+ src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c | 21 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c | 27 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 1087 ++++++-
+ src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c | 55 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c | 7 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 105 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctproc.c | 2 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctrci.c | 24 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c | 585 +++-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 1342 ++++++++-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c | 10 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c | 20 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 255 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 1007 +++++--
+ src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c | 69 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h | 46 +-
+ src/northbridge/amd/amdmct/mct_ddr3/s3utils.c | 652 +++-
+ src/northbridge/amd/amdmct/wrappers/mcti.h | 14 +-
+ src/northbridge/amd/amdmct/wrappers/mcti_d.c | 43 +-
+ 72 files changed, 9192 insertions(+), 2067 deletions(-)
+ create mode 100644 src/northbridge/amd/amdfam10/link_control.c
+ create mode 100644 src/northbridge/amd/amdfam10/nb_control.c
+
+diff --git a/src/cpu/amd/car/cache_as_ram.inc b/src/cpu/amd/car/cache_as_ram.inc
+index 0b2bc60..6542906 100644
+--- a/src/cpu/amd/car/cache_as_ram.inc
++++ b/src/cpu/amd/car/cache_as_ram.inc
+@@ -32,18 +32,23 @@
+ #define CacheSizeAPStack CONFIG_DCACHE_AP_STACK_SIZE
+
+ #define MSR_MCFG_BASE 0xC0010058
+-#define MSR_FAM10 0xC001102A
++#define MSR_BU_CFG2 0xC001102A
+
+ #define jmp_if_k8(x) comisd %xmm2, %xmm1; jb x
++#define jmp_if_not_fam15h(x) comisd %xmm3, %xmm1; jb x
++#define jmp_if_fam15h(x) comisd %xmm3, %xmm1; jae x
+
+ #define CPUID_MASK 0x0ff00f00
+ #define CPUID_VAL_FAM10_ROTATED 0x0f000010
++#define CPUID_VAL_FAM15_ROTATED 0x0f000060
+
+ /*
+ * XMM map:
+ * xmm1: CPU family
+ * xmm2: Fam10h comparison value
+- * xmm3: Backup EBX
++ * xmm3: Fam15h comparison value
++ * xmm4: Backup EBX
++ * xmm5: Coreboot init detect
+ */
+
+ /* Save the BIST result. */
+@@ -63,7 +68,7 @@ cache_as_ram_setup:
+ movl %eax, %cr4
+
+ /* Figure out the CPU family. */
+- cvtsi2sd %ebx, %xmm3
++ cvtsi2sd %ebx, %xmm4
+ movl $0x01, %eax
+ cpuid
+ /* Base family is bits 8..11, extended family is bits 20..27. */
+@@ -73,13 +78,16 @@ cache_as_ram_setup:
+ cvtsi2sd %eax, %xmm1
+ movl $CPUID_VAL_FAM10_ROTATED, %eax
+ cvtsi2sd %eax, %xmm2
+- cvtsd2si %xmm3, %ebx
++ movl $CPUID_VAL_FAM15_ROTATED, %eax
++ cvtsi2sd %eax, %xmm3
++ cvtsd2si %xmm4, %ebx
+
+ /* Check if cpu_init_detected. */
+ movl $MTRR_DEF_TYPE_MSR, %ecx
+ rdmsr
+ andl $MTRR_DEF_TYPE_EN, %eax
+ movl %eax, %ebx /* We store the status. */
++ cvtsi2sd %ebx, %xmm5
+
+ jmp_if_k8(CAR_FAM10_out_post_errata)
+
+@@ -120,21 +128,24 @@ cache_as_ram_setup:
+
+ CAR_FAM10_out:
+
++ jmp_if_fam15h(CAR_FAM10_errata_applied)
+ /*
+ * Errata 193: Disable clean copybacks to L3 cache to allow cached ROM.
+ * Re-enable it in after RAM is initialized and before CAR is disabled.
+ */
+- movl $MSR_FAM10, %ecx
++ movl $MSR_BU_CFG2, %ecx
+ rdmsr
+- bts $15, %eax
++ bts $15, %eax /* Set bit 15 in EDX:EAX (bit 15 in EAX). */
+ wrmsr
+
+ /* Erratum 343, RevGuide for Fam10h, Pub#41322 Rev. 3.33 */
+- movl $MSR_FAM10, %ecx
++ movl $MSR_BU_CFG2, %ecx
+ rdmsr
+ bts $35-32, %edx /* Set bit 35 in EDX:EAX (bit 3 in EDX). */
+ wrmsr
+
++CAR_FAM10_errata_applied:
++
+ #if CONFIG_MMCONF_SUPPORT
+ #if (CONFIG_MMCONF_BASE_ADDRESS > 0xFFFFFFFF)
+ #error "MMCONF_BASE_ADDRESS too big"
+@@ -169,6 +180,63 @@ CAR_FAM10_out:
+
+ CAR_FAM10_out_post_errata:
+
++ /* Fam15h APIC IDs do not depend on NB config bit 54 */
++ jmp_if_not_fam15h(skip_nb54_set)
++ movl $0xc001001f, %ecx /* NB_CFG_MSR */
++ rdmsr
++ bts $(54 - 32), %edx /* Set NB config bit 54 */
++ wrmsr
++
++skip_nb54_set:
++ /* On Fam15h CPUs each compute unit's MTRRs are shared between two cores */
++ jmp_if_not_fam15h(skip_cu_check)
++
++ /* Get the initial APIC ID. */
++ movl $1, %eax
++ cpuid
++ movl %ebx, %eax
++
++ /* Restore init detect */
++ cvtsd2si %xmm5, %ebx
++
++ /* Determine if this is the second core to start in a compute unit; if so, wait for first core start, clear init detect and skip MTRR init */
++ bt $24, %eax
++ jnc skip_cu_check /* First core in the compute unit jumps to skip_cu_check */
++
++ /* Determine if this is the second core to start in a compute unit; if so, clear init detect and skip MTRR init */
++ /* Busywait until the first core sets up the MTRRs */
++check_init_detect_1:
++ /* Check if cpu_init_detected. */
++ movl $MTRR_DEF_TYPE_MSR, %ecx
++ rdmsr
++ andl $MTRR_DEF_TYPE_EN, %eax
++ cmp $0x00000000, %eax
++ je check_init_detect_1 /* First core has not yet started */
++
++check_init_detect_2:
++ movl $SYSCFG_MSR, %ecx
++ rdmsr
++ andl $(SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_MtrrVarDramEn), %eax
++ cmp $0x00000000, %eax
++ je check_init_detect_2 /* First core has not yet started */
++
++ /* First core has now started */
++ movl $0x00000000, %ebx /* Clear init detect flag */
++ cvtsi2sd %ebx, %xmm5
++ jmp fam10_mtrr_setup_complete
++
++skip_cu_check:
++
++ jmp_if_not_fam15h(CAR_FAM15_errata_applied)
++
++ /* Erratum 714, RevGuide for Fam15h, Pub#48063 Rev. 3.24 */
++ movl $MSR_BU_CFG2, %ecx
++ rdmsr
++ bts $8, %eax /* Set bit 8 in EDX:EAX (bit 8 in EAX). */
++ wrmsr
++
++CAR_FAM15_errata_applied:
++
+ /* Set MtrrFixDramModEn for clear fixed MTRR. */
+ enable_fixed_mtrr_dram_modify:
+ movl $SYSCFG_MSR, %ecx
+@@ -337,8 +405,42 @@ wbcache_post_fam10_setup:
+ orl $(SYSCFG_MSR_MtrrVarDramEn | SYSCFG_MSR_MtrrFixDramEn), %eax
+ wrmsr
+
++fam10_mtrr_setup_complete:
+ post_code(0xa1)
+
++ /* Disable conversion of INVD to WBINVD (INVDWBINVD = 0) */
++ mov $0xc0010015, %ecx
++ rdmsr
++ btr $4, %eax
++ wrmsr
++
++jmp_if_not_fam15h(fam15_car_msr_setup_complete)
++ /* Disable streaming store (DisSS = 1) */
++ mov $0xc0011020, %ecx
++ rdmsr
++ bts $28, %eax
++ wrmsr
++
++ /* Disable speculative ITLB reloads (DisSpecTlbRld = 1) */
++ mov $0xc0011021, %ecx
++ rdmsr
++ bts $9, %eax
++ wrmsr
++
++ /* Disable speculative DTLB reloads (DisSpecTlbRld = 1) and set DisHwPf = 1 */
++ mov $0xc0011022, %ecx
++ rdmsr
++ bts $4, %eax
++ bts $13, %eax
++ wrmsr
++
++ /* Disable CR0 combining (CombineCr0Cd = 0) */
++ mov $0xc001102b, %ecx
++ rdmsr
++ btr $49-32, %edx
++ wrmsr
++fam15_car_msr_setup_complete:
++
+ /* Enable cache. */
+ movl %cr0, %eax
+ andl $(~(CR0_CacheDisable | CR0_NoWriteThrough)), %eax
+@@ -393,9 +495,6 @@ CAR_FAM10_ap:
+ * to reverse it.
+ */
+
+- /* Store our init detected. */
+- movl %ebx, %esi
+-
+ /* Get the coreid bits at first. */
+ movl $0x80000008, %eax
+ cpuid
+@@ -414,6 +513,8 @@ CAR_FAM10_ap:
+ movl %edi, %ecx /* CoreID bits */
+ bt $(54 - 32), %edx
+ jc roll_cfg
++
++ /* Fam10h NB config bit 54 was not set */
+ rolb %cl, %bl
+ roll_cfg:
+
+@@ -423,8 +524,8 @@ roll_cfg:
+ movl $(CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush))), %esp
+ subl %eax, %esp
+
+- /* Retrive init detected. */
+- movl %esi, %ebx
++ /* Restore init detect */
++ cvtsd2si %xmm5, %ebx
+
+ post_code(0xa4)
+
+@@ -437,6 +538,8 @@ CAR_FAM10_ap_out:
+ andl $~(3 << 9), %eax
+ movl %eax, %cr4
+
++ post_code(0xa6)
++
+ /* Restore the BIST result. */
+ movl %ebp, %eax
+
+@@ -444,6 +547,9 @@ CAR_FAM10_ap_out:
+ movl %esp, %ebp
+ pushl %ebx /* Init detected. */
+ pushl %eax /* BIST */
++
++ post_code(0xa7)
++
+ call cache_as_ram_main
+
+ /* We will not go back. */
+diff --git a/src/cpu/amd/car/disable_cache_as_ram.c b/src/cpu/amd/car/disable_cache_as_ram.c
+index 5eccf79..86180ee 100644
+--- a/src/cpu/amd/car/disable_cache_as_ram.c
++++ b/src/cpu/amd/car/disable_cache_as_ram.c
+@@ -19,7 +19,7 @@
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc.
+ *
+- * be warned, this file will be used other cores and core 0 / node 0
++ * WARNING: this file will be used by both any AP cores and core 0 / node 0
+ */
+
+ #include <cpu/x86/cache.h>
+@@ -34,41 +34,80 @@ static inline __attribute__((always_inline)) uint32_t amd_fam1x_cpu_family(void)
+ return family;
+ }
+
+-static inline __attribute__((always_inline)) void disable_cache_as_ram(void)
++static inline __attribute__((always_inline)) void disable_cache_as_ram(uint8_t skip_sharedc_config)
+ {
+ msr_t msr;
++ uint32_t family;
+
+- /* disable cache */
+- write_cr0(read_cr0() | CR0_CacheDisable);
++ if (!skip_sharedc_config) {
++ /* disable cache */
++ write_cr0(read_cr0() | CR0_CacheDisable);
+
+- msr.lo = 0;
+- msr.hi = 0;
+- wrmsr(MTRR_FIX_4K_C8000, msr);
++ msr.lo = 0;
++ msr.hi = 0;
++ wrmsr(MTRR_FIX_4K_C8000, msr);
+ #if CONFIG_DCACHE_RAM_SIZE > 0x8000
+- wrmsr(MTRR_FIX_4K_C0000, msr);
++ wrmsr(MTRR_FIX_4K_C0000, msr);
+ #endif
+ #if CONFIG_DCACHE_RAM_SIZE > 0x10000
+- wrmsr(MTRR_FIX_4K_D0000, msr);
++ wrmsr(MTRR_FIX_4K_D0000, msr);
+ #endif
+ #if CONFIG_DCACHE_RAM_SIZE > 0x18000
+- wrmsr(MTRR_FIX_4K_D8000, msr);
++ wrmsr(MTRR_FIX_4K_D8000, msr);
+ #endif
+- /* disable fixed mtrr from now on, it will be enabled by ramstage again*/
++ /* disable fixed mtrr from now on, it will be enabled by ramstage again */
++ msr = rdmsr(SYSCFG_MSR);
++ msr.lo &= ~(SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_MtrrFixDramModEn);
++ wrmsr(SYSCFG_MSR, msr);
++
++ /* Set the default memory type and disable fixed and enable variable MTRRs */
++ msr.hi = 0;
++ msr.lo = (1 << 11);
++
++ wrmsr(MTRR_DEF_TYPE_MSR, msr);
++
++ enable_cache();
++ }
+
+- msr = rdmsr(SYSCFG_MSR);
+- msr.lo &= ~(SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_MtrrFixDramModEn);
+- wrmsr(SYSCFG_MSR, msr);
++ /* INVDWBINVD = 1 */
++ msr = rdmsr(0xc0010015);
++ msr.lo |= (0x1 << 4);
++ wrmsr(0xc0010015, msr);
+
+- /* Set the default memory type and disable fixed and enable variable MTRRs */
+- msr.hi = 0;
+- msr.lo = (1 << 11);
++ family = amd_fam1x_cpu_family();
+
+- wrmsr(MTRR_DEF_TYPE_MSR, msr);
++#if IS_ENABLED(CPU_AMD_MODEL_10XXX)
++ if (family >= 0x6f) {
++ /* Family 15h or later */
+
+- enable_cache();
++ /* DisSS = 0 */
++ msr = rdmsr(0xc0011020);
++ msr.lo &= ~(0x1 << 28);
++ wrmsr(0xc0011020, msr);
++
++ if (!skip_sharedc_config) {
++ /* DisSpecTlbRld = 0 */
++ msr = rdmsr(0xc0011021);
++ msr.lo &= ~(0x1 << 9);
++ wrmsr(0xc0011021, msr);
++
++ /* Erratum 714: SpecNbReqDis = 0 */
++ msr = rdmsr(BU_CFG2_MSR);
++ msr.lo &= ~(0x1 << 8);
++ wrmsr(BU_CFG2_MSR, msr);
++ }
++
++ /* DisSpecTlbRld = 0 */
++ /* DisHwPf = 0 */
++ msr = rdmsr(0xc0011022);
++ msr.lo &= ~(0x1 << 4);
++ msr.lo &= ~(0x1 << 13);
++ wrmsr(0xc0011022, msr);
++ }
++#endif
+ }
+
+ static void disable_cache_as_ram_bsp(void)
+ {
+- disable_cache_as_ram();
++ disable_cache_as_ram(0);
+ }
+diff --git a/src/cpu/amd/family_10h-family_15h/defaults.h b/src/cpu/amd/family_10h-family_15h/defaults.h
+index 6fd1a7e..24f87ba 100644
+--- a/src/cpu/amd/family_10h-family_15h/defaults.h
++++ b/src/cpu/amd/family_10h-family_15h/defaults.h
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -25,41 +26,65 @@
+ */
+ static const struct {
+ u32 msr;
+- u32 revision;
++ uint64_t revision;
+ u32 platform;
+ u32 data_lo;
+ u32 data_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+ } fam10_msr_default[] = {
+- { TOP_MEM2, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { TOP_MEM2, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+
+- { SYSCFG, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { SYSCFG, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 3 << 21, 0x00000000,
+ 3 << 21, 0x00000000 }, /* [MtrrTom2En]=1,[TOM2EnWB] = 1*/
+
+- { HWCR, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+- 1 << 4, 0x00000000,
+- 1 << 4, 0x00000000 }, /* [INVD_WBINVD]=1 */
++ { MC1_CTL_MASK, AMD_OR_B2, AMD_PTYPE_ALL,
++ 1 << 18, 0x00000000,
++ 1 << 18, 0x00000000 }, /* Erratum 586: [DEIBP]=1 */
+
+- { MC4_CTL_MASK, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { MC1_CTL_MASK, AMD_OR_B2, AMD_PTYPE_ALL,
++ 1 << 15, 0x00000000,
++ 1 << 15, 0x00000000 }, /* Erratum 593: [BSRP]=1 */
++
++ { MC1_CTL_MASK, AMD_OR_C0, AMD_PTYPE_ALL,
++ 1 << 15, 0x00000000,
++ 1 << 15, 0x00000000 }, /* Erratum 739: [BSRP]=1 */
++
++ { 0xc0011000, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 1 << 16, 0x00000000,
++ 1 << 16, 0x00000000 }, /* Erratum 608: [bit 16]=1 */
++
++ { 0xc0011000, AMD_OR_C0, AMD_PTYPE_ALL,
++ 1 << 15, 0x00000000,
++ 1 << 15, 0x00000000 }, /* Erratum 727: [bit 15]=1 */
++
++ { MC4_CTL_MASK, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0xF << 19, 0x00000000,
+ 0xF << 19, 0x00000000 }, /* [RtryHt[0..3]]=1 */
+
++ { MC4_CTL_MASK, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
++ 1 << 10, 0x00000000,
++ 1 << 10, 0x00000000 }, /* [GartTblWkEn]=1 */
++
+ { DC_CFG, AMD_FAM10_ALL, AMD_PTYPE_SVR,
+ 0x00000000, 0x00000004,
+- 0x00000000, 0x0000000C }, /* [REQ_CTR] = 1 for Server */
++ 0x00000000, 0x0000000C }, /* Family 10h: [REQ_CTR] = 1 for Server */
+
+ { DC_CFG, AMD_DR_Bx, AMD_PTYPE_SVR,
+ 0x00000000, 0x00000000,
+ 0x00000000, 0x00000C00 }, /* Erratum 326 */
+
+- { NB_CFG, AMD_FAM10_ALL, AMD_PTYPE_DC | AMD_PTYPE_MC,
++ { NB_CFG, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_DC | AMD_PTYPE_MC,
+ 0x00000000, 1 << 22,
+ 0x00000000, 1 << 22 }, /* [ApicInitIDLo]=1 */
+
++ { NB_CFG, AMD_FAM15_ALL, AMD_PTYPE_DC | AMD_PTYPE_MC,
++ 1 << 23, 0x00000000,
++ 1 << 23, 0x00000000 }, /* Erratum 663: [bit 23]=1 */
++
+ { BU_CFG2, AMD_DR_Bx, AMD_PTYPE_ALL,
+ 1 << 29, 0x00000000,
+ 1 << 29, 0x00000000 }, /* For Bx Smash1GPages=1 */
+@@ -72,6 +97,14 @@ static const struct {
+ 0 << 1, 0x00000000,
+ 1 << 1, 0x00000000 }, /* IDX_MATCH_ALL=0 */
+
++ { IC_CFG, AMD_OR_C0, AMD_PTYPE_ALL,
++ 0x00000000, 1 << (39-32),
++ 0x00000000, 1 << (39-32)}, /* C0 or above [DisLoopPredictor]=1 */
++
++ { IC_CFG, AMD_OR_C0, AMD_PTYPE_ALL,
++ 0xf << 1, 0x00000000,
++ 0xf << 1, 0x00000000}, /* C0 or above [DisIcWayFilter]=0xf */
++
+ { BU_CFG, AMD_DR_LT_B3, AMD_PTYPE_ALL,
+ 1 << 21, 0x00000000,
+ 1 << 21, 0x00000000 }, /* Erratum #254 DR B1 BU_CFG[21]=1 */
+@@ -80,19 +113,51 @@ static const struct {
+ 1 << 23, 0x00000000,
+ 1 << 23, 0x00000000 }, /* Erratum #309 BU_CFG[23]=1 */
+
++ { BU_CFG, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0 << 10, 0x00000000,
++ 1 << 10, 0x00000000 }, /* [DcacheAgressivePriority]=0 */
++
+ /* CPUID_EXT_FEATURES */
+- { CPUIDFEATURES, AMD_FAM10_ALL, AMD_PTYPE_DC | AMD_PTYPE_MC,
++ { CPUIDFEATURES, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_DC | AMD_PTYPE_MC,
+ 1 << 28, 0x00000000,
+ 1 << 28, 0x00000000 }, /* [HyperThreadFeatEn]=1 */
+
+- { CPUIDFEATURES, AMD_FAM10_ALL, AMD_PTYPE_DC,
++ { CPUIDFEATURES, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_DC,
+ 0x00000000, 1 << (33-32),
+ 0x00000000, 1 << (33-32) }, /* [ExtendedFeatEn]=1 */
+
++ { DE_CFG, AMD_OR_B2, AMD_PTYPE_ALL,
++ 1 << 10, 0x00000000,
++ 1 << 10, 0x00000000 }, /* Bx [ResyncPredSingleDispDis]=1 */
++
+ { BU_CFG2, AMD_DRBH_Cx, AMD_PTYPE_ALL,
+ 0x00000000, 1 << (35-32),
+ 0x00000000, 1 << (35-32) }, /* Erratum 343 (set to 0 after CAR, in post_cache_as_ram()/model_10xxx_init() ) */
+
++ { BU_CFG3, AMD_OR_B2, AMD_PTYPE_ALL,
++ 0x00000000, 1 << (42-32),
++ 0x00000000, 1 << (42-32)}, /* Bx [PwcDisableWalkerSharing]=1 */
++
++ { BU_CFG3, AMD_OR_C0, AMD_PTYPE_ALL,
++ 1 << 22, 0x00000000,
++ 1 << 22, 0x00000000}, /* C0 or above [PfcDoubleStride]=1 */
++
++ { EX_CFG, AMD_OR_C0, AMD_PTYPE_ALL,
++ 0x00000000, 1 << (54-32),
++ 0x00000000, 1 << (54-32)}, /* C0 or above [LateSbzResync]=1 */
++
++ { LS_CFG2, AMD_OR_C0, AMD_PTYPE_ALL,
++ 1 << 23, 0x00000000,
++ 1 << 23, 0x00000000}, /* C0 or above [DisScbThreshold]=1 */
++
++ { LS_CFG2, AMD_OR_C0, AMD_PTYPE_ALL,
++ 1 << 14, 0x00000000,
++ 1 << 14, 0x00000000}, /* C0 or above [ForceSmcCheckFlowStDis]=1 */
++
++ { LS_CFG2, AMD_OR_C0, AMD_PTYPE_ALL,
++ 1 << 12, 0x00000000,
++ 1 << 12, 0x00000000}, /* C0 or above [ForceBusLockDis]=1 */
++
+ { OSVW_ID_Length, AMD_DR_Bx | AMD_DR_Cx | AMD_DR_Dx, AMD_PTYPE_ALL,
+ 0x00000004, 0x00000000,
+ 0x00000004, 0x00000000}, /* B0 or Above, OSVW_ID_Length is 0004h */
+@@ -105,9 +170,45 @@ static const struct {
+ 0x00000000, 1 << (50-32),
+ 0x00000000, 1 << (50-32)}, /* D0 or Above, RdMmExtCfgQwEn*/
+
++ { BU_CFG2, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000000, 0x0 << (36-32),
++ 0x00000000, 0x3 << (36-32)}, /* [ThrottleNbInterface]=0 */
++
++ { BU_CFG2, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 1 << 10, 0x00000000,
++ 1 << 10, 0x00000000}, /* [VicResyncChkEn]=1 */
++
++ { BU_CFG2, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 1 << 11, 0x00000000,
++ 1 << 11, 0x00000000}, /* Erratum 503: [bit 11]=1 */
++
+ { CPU_ID_EXT_FEATURES_MSR, AMD_DR_Dx, AMD_PTYPE_ALL,
+ 0x00000000, 1 << (51 - 32),
+ 0x00000000, 1 << (51 - 32)}, /* G34_PKG | C32_PKG | S1G4_PKG | ASB2_PKG */
++
++ { CPU_ID_EXT_FEATURES_MSR, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000000, 1 << (56 - 32),
++ 0x00000000, 1 << (56 - 32)}, /* [PerfCtrExtNB]=1 */
++
++ { CPU_ID_EXT_FEATURES_MSR, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000000, 1 << (55 - 32),
++ 0x00000000, 1 << (55 - 32)}, /* [PerfCtrExtCore]=1 */
++
++ { IBS_OP_DATA3, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0 << 16, 0x00000000,
++ 1 << 16, 0x00000000}, /* [IbsDcMabHit]=0 */
++
++ { MC4_MISC0, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000000, 0x1 << (52-32),
++ 0x00000000, 0xf << (52-32)}, /* [LvtOffset]=1 */
++
++ { MC4_MISC1, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000000, 0x1 << (52-32),
++ 0x00000000, 0xf << (52-32)}, /* [LvtOffset]=1 */
++
++ { MC4_MISC2, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000000, 0x1 << (52-32),
++ 0x00000000, 0xf << (52-32)}, /* [LvtOffset]=1 */
+ };
+
+
+@@ -117,37 +218,46 @@ static const struct {
+ static const struct {
+ u8 function;
+ u16 offset;
+- u32 revision;
++ uint64_t revision;
+ u32 platform;
+ u32 data;
+ u32 mask;
+ } fam10_pci_default[] = {
+
+ /* Function 0 - HT Config */
++ { 0, 0x68, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
++ 0x000e0000, 0x000e0000 }, /* [19:17] for 8bit APIC config */
++
++ { 0, 0x68, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
++ 0x00400000, 0x00600000 }, /* [22:21] DsNpReqLmt = 10b */
+
+- { 0, 0x68, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+- 0x004E4800, 0x006E6800 }, /* [19:17] for 8bit APIC config,
+- [14:13] BufPriRel = 2h [11] RspPassPW set,
+- [22:21] DsNpReqLmt = 10b */
++ { 0, 0x68, AMD_FAM10_LT_D, AMD_PTYPE_ALL,
++ 0x00004000, 0x00006000 }, /* [14:13] BufRelPri = 2h */
++
++ { 0, 0x68, (AMD_FAM10_REV_D | AMD_FAM15_ALL), AMD_PTYPE_ALL,
++ 0x00002000, 0x00006000 }, /* [14:13] BufRelPri = 1h */
++
++ { 0, 0x68, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
++ 0x00000800, 0x00000800 }, /* [11] RspPassPW = 1 */
+
+ /* Errata 281 Workaround */
+ { 0, 0x68, (AMD_DR_B0 | AMD_DR_B1),
+ AMD_PTYPE_SVR, 0x00200000, 0x00600000 }, /* [22:21] DsNpReqLmt0 = 01b */
+
+- { 0, 0x84, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 0, 0x84, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
+
+- { 0, 0xA4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 0, 0xA4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
+
+- { 0, 0xC4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 0, 0xC4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
+
+- { 0, 0xE4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 0, 0xE4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
+
+ /* Link Global Retry Control Register */
+- { 0, 0x150, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 0, 0x150, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00073900, 0x00073F00 },
+
+ /* Errata 351
+@@ -172,13 +282,39 @@ static const struct {
+ 0x00000000, 0x00000100 },
+ { 0, 0x18C, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0x00000000, 0x00000100 },
+- { 0, 0x170, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+- 0x00000000, 0x00000100 },
+
+ /* Link Global Extended Control Register */
+ { 0, 0x16C, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0x00000014, 0x0000003F }, /* [15:13] ForceFullT0 = 0b,
+- * Set T0Time 14h per BKDG */
++ * Set T0Time 14h per BKDG */
++
++ { 0, 0x170, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x174, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x178, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x17C, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x180, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x184, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x188, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++ { 0, 0x18C, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000100, 0x00000100 },
++
++ /* Link Global Extended Control Register */
++ { 0, 0x16C, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000014, 0x0000003F }, /* [15:13] ForceFullT0 = 111b,
++ * Set T0Time 26h per BKDG */
++
++ { 0, 0x16C, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x7 << 13, 0x7 << 13 }, /* [15:13] ForceFullT0 = 7h */
++
++ { 0, 0x16C, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x26, 0x3f }, /* [5:0] T0Time = 26h */
+
+
+ /* Function 1 - Map Init */
+@@ -205,10 +341,10 @@ static const struct {
+ /* Function 2 - DRAM Controller */
+
+ /* Function 3 - Misc. Control */
+- { 3, 0x40, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0x40, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00000100, 0x00000100 }, /* [8] MstrAbrtEn */
+
+- { 3, 0x44, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0x44, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x4A30005C, 0x4A30005C }, /* [30] SyncOnDramAdrParErrEn = 1,
+ [27] NbMcaToMstCpuEn = 1,
+ [25] DisPciCfgCpuErrRsp = 1,
+@@ -220,8 +356,12 @@ static const struct {
+ [2] SyncOnUcEccEn = 1 */
+
+ /* XBAR buffer settings */
+- { 3, 0x6C, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+- 0x00018052, 0x700780F7 },
++ { 3, 0x6c, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ 0x00018052, 0x700780f7 },
++
++ /* XBAR buffer settings */
++ { 3, 0x6c, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x10010052, 0x700700f7 },
+
+ /* Errata 281 Workaround */
+ { 3, 0x6C, ( AMD_DR_B0 | AMD_DR_B1),
+@@ -233,12 +373,18 @@ static const struct {
+ { 3, 0x70, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0x00041153, 0x777777F7 },
+
++ { 3, 0x70, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x10171155, 0x777777f7 },
++
+ { 3, 0x70, AMD_FAM10_ALL, AMD_PTYPE_UMA,
+ 0x61221151, 0x777777F7 },
+
+ { 3, 0x74, AMD_FAM10_ALL, AMD_PTYPE_UMA,
+ 0x00080101, 0x000F7777 },
+
++ { 3, 0x74, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00172111, 0x77ff7777 },
++
+ { 3, 0x7C, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0x00090914, 0x707FFF1F },
+
+@@ -246,12 +392,18 @@ static const struct {
+ { 3, 0x7C, ( AMD_DR_B0 | AMD_DR_B1),
+ AMD_PTYPE_SVR, 0x00144514, 0x707FFF1F },
+
++ { 3, 0x7C, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x040d0f16, 0x07ffff1f },
++
+ { 3, 0x7C, AMD_FAM10_ALL, AMD_PTYPE_UMA,
+ 0x00070814, 0x007FFF1F },
+
+ { 3, 0x140, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0x00800756, 0x00F3FFFF },
+
++ { 3, 0x140, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00a11755, 0x00f3ffff },
++
+ { 3, 0x140, AMD_FAM10_ALL, AMD_PTYPE_UMA,
+ 0x00C37756, 0x00F3FFFF },
+
+@@ -263,6 +415,9 @@ static const struct {
+ AMD_PTYPE_SVR, 0x00000001, 0x0000000F },
+ /* [3:0] RspTok = 0001b */
+
++ { 3, 0x144, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x00000028, 0x000000ff },
++
+ { 3, 0x148, AMD_FAM10_ALL, AMD_PTYPE_UMA,
+ 0x8000052A, 0xD5FFFFFF },
+
+@@ -270,41 +425,53 @@ static const struct {
+ { 3, 0x80, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0xE6002200, 0xFFFFFFFF },
+
++ /* ACPI Power State Control Reg1 */
++ { 3, 0x80, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0xe20be200, 0xefefef00 },
++
+ /* ACPI Power State Control Reg2 */
+ { 3, 0x84, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+ 0xA0E641E6, 0xFFFFFFFF },
+
++ /* ACPI Power State Control Reg2 */
++ { 3, 0x84, AMD_FAM15_ALL, AMD_PTYPE_ALL,
++ 0x01e200e2, 0xefef00ef },
++
+ { 3, 0xA0, AMD_FAM10_ALL, AMD_PTYPE_MOB | AMD_PTYPE_DSK,
+ 0x00000080, 0x00000080 }, /* [7] PSIVidEnable */
+
+ { 3, 0xA0, AMD_DR_Bx, AMD_PTYPE_ALL,
+ 0x00002800, 0x000003800 }, /* [13:11] PllLockTime = 5 */
+
+- { 3, 0xA0, (AMD_FAM10_ALL & ~(AMD_DR_Bx)), AMD_PTYPE_ALL,
++ { 3, 0xA0, ((AMD_FAM10_ALL | AMD_FAM15_ALL) & ~(AMD_DR_Bx)), AMD_PTYPE_ALL,
+ 0x00000800, 0x000003800 }, /* [13:11] PllLockTime = 1 */
+
+ /* Reported Temp Control Register */
+- { 3, 0xA4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0xA4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00000080, 0x00000080 }, /* [7] TempSlewDnEn = 1 */
+
+ /* Clock Power/Timing Control 0 Register */
+- { 3, 0xD4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0xD4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0xC0000F00, 0xF0000F00 }, /* [31] NbClkDivApplyAll = 1,
+ [30:28] NbClkDiv = 100b,[11:8] ClkRampHystSel = 1111b */
+
+ /* Clock Power/Timing Control 1 Register */
++ { 3, 0xD8, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
++ 0x03000010, 0x0F000070 }, /* [6:4] VSRampTime = 1,
++ * [27:24] ReConDel = 3 */
++
++ /* Clock Power/Timing Control 1 Register */
+ { 3, 0xD8, AMD_FAM10_ALL, AMD_PTYPE_ALL,
+- 0x03000016, 0x0F000077 }, /* [6:4] VSRampTime = 1,
+- [2:0] VSSlamTime = 6, [27:24] ReConDel = 3 */
++ 0x00000006, 0x00000007 }, /* [2:0] VSSlamTime = 6 */
+
+
+ /* Clock Power/Timing Control 2 Register */
+- { 3, 0xDC, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0xDC, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00005000, 0x00007000 }, /* [14:12] NbsynPtrAdj = 5 */
+
+
+ /* Extended NB MCA Config Register */
+- { 3, 0x180, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0x180, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x007003E2, 0x007003E2 }, /* [22:20] = SyncFloodOn_Err = 7,
+ [9] SyncOnUncNbAryEn = 1 ,
+ [8] SyncOnProtEn = 1,
+@@ -319,12 +486,17 @@ static const struct {
+ 0x00400000, 0x00400000 },
+
+ /* L3 Control Register */
+- { 3, 0x1B8, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0x1b8, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00001000, 0x00001000 }, /* [12] = L3PrivReplEn */
+
+ /* IBS Control Register */
+- { 3, 0x1CC, AMD_FAM10_ALL, AMD_PTYPE_ALL,
++ { 3, 0x1cc, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
+ 0x00000100, 0x00000100 }, /* [8] = LvtOffsetVal */
++
++ /* Erratum 619 - Family 15h Bx
++ * System software should set F5x88[14] to 1b. */
++ { 5, 0x88, AMD_OR_B2, AMD_PTYPE_ALL,
++ 1 << 14, 1 << 14 },
+ };
+
+
+@@ -333,7 +505,7 @@ static const struct {
+ */
+ static const struct {
+ u16 htreg; /* HT Phy Register index */
+- u32 revision;
++ uint64_t revision;
+ u32 platform;
+ u32 linktype;
+ u32 data;
+@@ -442,38 +614,38 @@ static const struct {
+ { 0x530A, AMD_DR_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
+ 0x00004400, 0x00006400 }, /* HT_PHY_DLL_REG */
+
+- { 0xCF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
++ { 0xCF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
+ 0x00000000, 0x000000FF }, /* Provide clear setting for logical
+ completeness */
+
+- { 0xDF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
++ { 0xDF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
+ 0x00000000, 0x000000FF }, /* Provide clear setting for logical
+ completeness */
+
+- { 0xCF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
++ { 0xCF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
+ 0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */
+
+- { 0xDF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
++ { 0xDF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
+ 0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */
+
+ /* Link Phy Receiver Loop Filter Registers */
+- { 0xD1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
++ { 0xD1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
+ 0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h,
+ [21:14] LfcMin = 10h */
+
+- { 0xC1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
++ { 0xC1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
+ 0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h,
+ [21:14] LfcMin = 10h */
+
+- { 0xD1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
++ { 0xD1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
+ 0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h,
+ [21:14] LfcMin = 08h */
+
+- { 0xC1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
++ { 0xC1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
+ 0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h,
+ [21:14] LfcMin = 08h */
+
+- { 0xC0, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
++ { 0xC0, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
+ 0x40040000, 0xe01F0000 }, /* [31:29] RttCtl = 02h,
+ [20:16] RttIndex = 04h */
+ };
+diff --git a/src/cpu/amd/family_10h-family_15h/fidvid.c b/src/cpu/amd/family_10h-family_15h/fidvid.c
+index 86e3179..e8e0818 100644
+--- a/src/cpu/amd/family_10h-family_15h/fidvid.c
++++ b/src/cpu/amd/family_10h-family_15h/fidvid.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -43,7 +44,7 @@ Fam10 Bios and Kernel Development Guide #31116, rev 3.48, April 22, 2010
+
+ 3.- 2.4.2.7 dualPlaneOnly(dev)
+
+-4.- 2.4.2.8 applyBoostFIDOffset(dev)
++4.- 2.4.2.8 applyBoostFIDOffset(dev, nodeid)
+
+ 5.- enableNbPState1(dev)
+
+@@ -142,25 +143,33 @@ static void enable_fid_change(u8 fid)
+ }
+ }
+
+-static void applyBoostFIDOffset( device_t dev ) {
+- // BKDG 2.4.2.8
+- // revision E only, but E is apparently not supported yet, therefore untested
+- if ((cpuid_edx(0x80000007) & CPB_MASK)
+- && ((cpuid_ecx(0x80000008) & NC_MASK) ==5) ) {
+- u32 core = get_node_core_id_x().coreid;
+- u32 asymetricBoostThisCore = ((pci_read_config32(dev, 0x10C) >> (core*2))) & 3;
+- msr_t msr = rdmsr(PS_REG_BASE);
+- u32 cpuFid = msr.lo & PS_CPU_FID_MASK;
+- cpuFid = cpuFid + asymetricBoostThisCore;
+- msr.lo &= ~PS_CPU_FID_MASK;
+- msr.lo |= cpuFid ;
+- wrmsr(PS_REG_BASE , msr);
+-
+- }
++static void applyBoostFIDOffset(device_t dev, uint32_t nodeid) {
++ // BKDG 2.4.2.8
++ // Fam10h revision E only, but E is apparently not supported yet, therefore untested
++ if ((cpuid_edx(0x80000007) & CPB_MASK)
++ && ((cpuid_ecx(0x80000008) & NC_MASK) == 5) ) {
++ u32 core = get_node_core_id_x().coreid;
++ u32 asymetricBoostThisCore = ((pci_read_config32(dev, 0x10C) >> (core*2))) & 3;
++ msr_t msr = rdmsr(PS_REG_BASE);
++ u32 cpuFid = msr.lo & PS_CPU_FID_MASK;
++ cpuFid = cpuFid + asymetricBoostThisCore;
++ msr.lo &= ~PS_CPU_FID_MASK;
++ msr.lo |= cpuFid ;
++ wrmsr(PS_REG_BASE , msr);
++ } else if (is_fam15h()) {
++ uint32_t dword = pci_read_config32(NODE_PCI(nodeid, 4), 0x15c);
++ uint8_t boost_count = (dword >> 2) & 0x7;
++ if (boost_count > 0) {
++ /* Enable boost */
++ dword &= ~0x3;
++ dword |= 0x1;
++ pci_write_config32(NODE_PCI(nodeid, 4), 0x15c, dword);
++ }
++ }
+ }
+
+ static void enableNbPState1( device_t dev ) {
+- u32 cpuRev = mctGetLogicalCPUID(0xFF);
++ uint64_t cpuRev = mctGetLogicalCPUID(0xFF);
+ if (cpuRev & AMD_FAM10_C3) {
+ u32 nbPState = (pci_read_config32(dev, 0x1F0) & NB_PSTATE_MASK);
+ if ( nbPState){
+@@ -202,7 +211,7 @@ static u8 setPStateMaxVal( device_t dev ) {
+ static void dualPlaneOnly( device_t dev ) {
+ // BKDG 2.4.2.7
+
+- u32 cpuRev = mctGetLogicalCPUID(0xFF);
++ uint64_t cpuRev = mctGetLogicalCPUID(0xFF);
+ if ((mctGetProcessorPackageType() == AMD_PKGTYPE_AM3_2r2)
+ && (cpuRev & AMD_DR_Cx)) { // should be rev C or rev E but there's no constant for E
+ if ( (pci_read_config32(dev, 0x1FC) & DUAL_PLANE_ONLY_MASK)
+@@ -282,12 +291,16 @@ static void recalculateVsSlamTimeSettingOnCorePre(device_t dev)
+ */
+
+ /* Determine if this is a PVI or SVI system */
+- dtemp = pci_read_config32(dev, 0xA0);
+-
+- if (dtemp & PVI_MODE)
+- pviModeFlag = 1;
+- else
++ if (is_fam15h()) {
+ pviModeFlag = 0;
++ } else {
++ dtemp = pci_read_config32(dev, 0xa0);
++
++ if (dtemp & PVI_MODE)
++ pviModeFlag = 1;
++ else
++ pviModeFlag = 0;
++ }
+
+ /* Get P0's voltage */
+ /* MSRC001_00[68:64] are not programmed yet when called from
+@@ -514,59 +527,67 @@ static void config_nb_syn_ptr_adj(device_t dev, u32 cpuRev) {
+ }
+
+ static void config_acpi_pwr_state_ctrl_regs(device_t dev, u32 cpuRev, u8 procPkg) {
+- /* step 1, chapter 2.4.2.6 of AMD Fam 10 BKDG #31116 Rev 3.48 22.4.2010 */
+- u32 dword;
+- u32 c1= 1;
+- if (cpuRev & (AMD_DR_Bx)) {
+- // will coreboot ever enable cache scrubbing ?
+- // if it does, will it be enough to check the current state
+- // or should we configure for what we'll set up later ?
+- dword = pci_read_config32(dev, 0x58);
+- u32 scrubbingCache = dword &
+- ( (0x1F << 16) // DCacheScrub
+- | (0x1F << 8) ); // L2Scrub
+- if (scrubbingCache) {
+- c1 = 0x80;
+- } else {
+- c1 = 0xA0;
+- }
+- } else { // rev C or later
+- // same doubt as cache scrubbing: ok to check current state ?
+- dword = pci_read_config32(dev, 0xDC);
+- u32 cacheFlushOnHalt = dword & (7 << 16);
+- if (!cacheFlushOnHalt) {
+- c1 = 0x80;
+- }
+- }
+- dword = (c1 << 24) | (0xE641E6);
+- pci_write_config32(dev, 0x84, dword);
+-
+-
+- /* FIXME: BKDG Table 100 says if the link is at a Gen1
+-frequency and the chipset does not support a 10us minimum LDTSTOP
+-assertion time, then { If ASB2 && SVI then smaf001 = F6h else
+-smaf001=87h. } else ... I hardly know what it means or how to check
+-it from here, so I bluntly assume it is false and code here the else,
+-which is easier */
+-
+- u32 smaf001 = 0xE6;
+- if (cpuRev & AMD_DR_Bx ) {
+- smaf001 = 0xA6;
+- } else {
+- #if CONFIG_SVI_HIGH_FREQ
+- if (cpuRev & (AMD_RB_C3 | AMD_DA_C3)) {
+- smaf001 = 0xF6;
+- }
+- #endif
+- }
+- u32 fidvidChange = 0;
+- if (((cpuRev & AMD_DA_Cx) && (procPkg & AMD_PKGTYPE_S1gX))
+- || (cpuRev & AMD_RB_C3) ) {
+- fidvidChange=0x0B;
+- }
+- dword = (0xE6 << 24) | (fidvidChange << 16)
+- | (smaf001 << 8) | 0x81;
+- pci_write_config32(dev, 0x80, dword);
++ if (is_fam15h()) {
++ /* Family 15h BKDG Rev. 3.14 D18F3x80 recommended settings */
++ pci_write_config32(dev, 0x80, 0xe20be281);
++
++ /* Family 15h BKDG Rev. 3.14 D18F3x84 recommended settings */
++ pci_write_config32(dev, 0x84, 0x01e200e2);
++ } else {
++ /* step 1, chapter 2.4.2.6 of AMD Fam 10 BKDG #31116 Rev 3.48 22.4.2010 */
++ u32 dword;
++ u32 c1= 1;
++ if (cpuRev & (AMD_DR_Bx)) {
++ // will coreboot ever enable cache scrubbing ?
++ // if it does, will it be enough to check the current state
++ // or should we configure for what we'll set up later ?
++ dword = pci_read_config32(dev, 0x58);
++ u32 scrubbingCache = dword &
++ ( (0x1F << 16) // DCacheScrub
++ | (0x1F << 8) ); // L2Scrub
++ if (scrubbingCache) {
++ c1 = 0x80;
++ } else {
++ c1 = 0xA0;
++ }
++ } else { // rev C or later
++ // same doubt as cache scrubbing: ok to check current state ?
++ dword = pci_read_config32(dev, 0xDC);
++ u32 cacheFlushOnHalt = dword & (7 << 16);
++ if (!cacheFlushOnHalt) {
++ c1 = 0x80;
++ }
++ }
++ dword = (c1 << 24) | (0xE641E6);
++ pci_write_config32(dev, 0x84, dword);
++
++ /* FIXME: BKDG Table 100 says if the link is at a Gen1
++ * frequency and the chipset does not support a 10us minimum LDTSTOP
++ * assertion time, then { If ASB2 && SVI then smaf001 = F6h else
++ * smaf001=87h. } else ... I hardly know what it means or how to check
++ * it from here, so I bluntly assume it is false and code here the else,
++ * which is easier
++ */
++
++ u32 smaf001 = 0xE6;
++ if (cpuRev & AMD_DR_Bx ) {
++ smaf001 = 0xA6;
++ } else {
++ #if CONFIG_SVI_HIGH_FREQ
++ if (cpuRev & (AMD_RB_C3 | AMD_DA_C3)) {
++ smaf001 = 0xF6;
++ }
++ #endif
++ }
++ u32 fidvidChange = 0;
++ if (((cpuRev & AMD_DA_Cx) && (procPkg & AMD_PKGTYPE_S1gX))
++ || (cpuRev & AMD_RB_C3) ) {
++ fidvidChange=0x0B;
++ }
++ dword = (0xE6 << 24) | (fidvidChange << 16)
++ | (smaf001 << 8) | 0x81;
++ pci_write_config32(dev, 0x80, dword);
++ }
+ }
+
+ static void prep_fid_change(void)
+@@ -583,7 +604,7 @@ static void prep_fid_change(void)
+ for (i = 0; i < nodes; i++) {
+ printk(BIOS_DEBUG, "Prep FID/VID Node:%02x\n", i);
+ dev = NODE_PCI(i, 3);
+- u32 cpuRev = mctGetLogicalCPUID(0xFF) ;
++ uint64_t cpuRev = mctGetLogicalCPUID(0xFF) ;
+ u8 procPkg = mctGetProcessorPackageType();
+
+ setVSRamp(dev);
+@@ -611,7 +632,7 @@ static void prep_fid_change(void)
+ }
+ }
+
+-static void waitCurrentPstate(u32 target_pstate){
++static void waitCurrentPstate(u32 target_pstate) {
+ msr_t initial_msr = rdmsr(TSC_MSR);
+ msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
+ msr_t tsc_msr;
+@@ -644,7 +665,7 @@ static void waitCurrentPstate(u32 target_pstate){
+
+ if (pstate_msr.lo != target_pstate) {
+ msr_t limit_msr = rdmsr(0xc0010061);
+- printk(BIOS_ERR, "*** Time out waiting for P-state %01x. Current P-state %01x P-state current limit MSRC001_0061=%02x\n", target_pstate, pstate_msr.lo, limit_msr.lo);
++ printk(BIOS_ERR, "*** Time out waiting for P-state %01x. Current P-state %01x P-state current limit MSRC001_0061=%08x %08x\n", target_pstate, pstate_msr.lo, limit_msr.hi, limit_msr.lo);
+
+ do { // should we just go on instead ?
+ pstate_msr = rdmsr(CUR_PSTATE_MSR);
+@@ -654,6 +675,7 @@ static void waitCurrentPstate(u32 target_pstate){
+
+ static void set_pstate(u32 nonBoostedPState) {
+ msr_t msr;
++ uint8_t skip_wait;
+
+ // Transition P0 for calling core.
+ msr = rdmsr(0xC0010062);
+@@ -661,12 +683,21 @@ static void set_pstate(u32 nonBoostedPState) {
+ msr.lo = nonBoostedPState;
+ wrmsr(0xC0010062, msr);
+
+- /* Wait for P0 to set. */
+- waitCurrentPstate(nonBoostedPState);
+-}
+-
+-
++ if (is_fam15h()) {
++ /* Do not wait for the first (even) set of cores to transition on Family 15h systems */
++ if ((cpuid_ebx(0x00000001) & 0x01000000))
++ skip_wait = 0;
++ else
++ skip_wait = 1;
++ } else {
++ skip_wait = 0;
++ }
+
++ if (!skip_wait) {
++ /* Wait for core to transition to P0 */
++ waitCurrentPstate(nonBoostedPState);
++ }
++}
+
+ static void UpdateSinglePlaneNbVid(void)
+ {
+@@ -756,11 +787,14 @@ static u32 needs_NB_COF_VID_update(void)
+ u8 nodes;
+ u8 i;
+
++ if (is_fam15h())
++ return 0;
++
+ /* If any node has nb_cof_vid_update set all nodes need an update. */
+ nodes = get_nodes();
+ nb_cof_vid_update = 0;
+ for (i = 0; i < nodes; i++) {
+- u32 cpuRev = mctGetLogicalCPUID(i) ;
++ uint64_t cpuRev = mctGetLogicalCPUID(i);
+ u32 nbCofVidUpdateDefined = (cpuRev & (AMD_FAM10_LT_D));
+ if (nbCofVidUpdateDefined
+ && (pci_read_config32(NODE_PCI(i, 3), 0x1FC)
+@@ -784,9 +818,11 @@ static u32 init_fidvid_core(u32 nodeid, u32 coreid)
+ /* Steps 1-6 of BIOS NB COF and VID Configuration
+ * for SVI and Single-Plane PVI Systems. BKDG 2.4.2.9 #31116 rev 3.48
+ */
+-
+ dev = NODE_PCI(nodeid, 3);
+- pvimode = pci_read_config32(dev, PW_CTL_MISC) & PVI_MODE;
++ if (is_fam15h())
++ pvimode = 0;
++ else
++ pvimode = pci_read_config32(dev, PW_CTL_MISC) & PVI_MODE;
+ reg1fc = pci_read_config32(dev, 0x1FC);
+
+ if (nb_cof_vid_update) {
+@@ -798,7 +834,7 @@ static u32 init_fidvid_core(u32 nodeid, u32 coreid)
+ fid_max = fid_max + ((reg1fc & DUAL_PLANE_NB_FID_OFF_MASK ) >> DUAL_PLANE_NB_FID_SHIFT );
+ }
+ /* write newNbVid to P-state Reg's NbVid always if NbVidUpdatedAll=1 */
+- fixPsNbVidBeforeWR(vid_max, coreid,dev,pvimode);
++ fixPsNbVidBeforeWR(vid_max, coreid, dev, pvimode);
+
+ /* fid setup is handled by the BSP at the end. */
+
+@@ -818,7 +854,7 @@ static void init_fidvid_ap(u32 apicid, u32 nodeid, u32 coreid)
+
+ printk(BIOS_DEBUG, "FIDVID on AP: %02x\n", apicid);
+
+- send = init_fidvid_core(nodeid,coreid);
++ send = init_fidvid_core(nodeid, coreid);
+ send |= (apicid << 24); // ap apicid
+
+ // Send signal to BSP about this AP max fid
+@@ -860,7 +896,8 @@ static void init_fidvid_bsp_stage1(u32 ap_apicid, void *gp)
+ while (--loop > 0) {
+ if (lapic_remote_read(ap_apicid, LAPIC_MSG_REG, &readback) != 0)
+ continue;
+- if ((readback & 0x3f) == F10_APSTATE_RESET) {
++ if (((readback & 0x3f) == F10_APSTATE_RESET)
++ || (is_fam15h() && ((readback & 0x3f) == F10_APSTATE_ASLEEP))) {
+ timeout = 0;
+ break; /* target ap is in stage 1 */
+ }
+@@ -948,7 +985,10 @@ static void init_fidvid_stage2(u32 apicid, u32 nodeid)
+ /* If any node has nb_cof_vid_update set all nodes need an update. */
+
+ dev = NODE_PCI(nodeid, 3);
+- pvimode = (pci_read_config32(dev, 0xA0) >> 8) & 1;
++ if (is_fam15h())
++ pvimode = 0;
++ else
++ pvimode = (pci_read_config32(dev, 0xA0) >> 8) & 1;
+ reg1fc = pci_read_config32(dev, 0x1FC);
+ nbvid = (reg1fc >> 7) & 0x7F;
+ NbVidUpdateAll = (reg1fc >> 1) & 1;
+@@ -969,15 +1009,17 @@ static void init_fidvid_stage2(u32 apicid, u32 nodeid)
+ pci_write_config32(dev, 0xA0, dtemp);
+
+ dualPlaneOnly(dev);
+- applyBoostFIDOffset(dev);
++ applyBoostFIDOffset(dev, nodeid);
+ enableNbPState1(dev);
+
+ finalPstateChange();
+
+- /* Set TSC to tick at the P0 ndfid rate */
+- msr = rdmsr(HWCR);
+- msr.lo |= 1 << 24;
+- wrmsr(HWCR, msr);
++ if (!is_fam15h()) {
++ /* Set TSC to tick at the P0 ndfid rate */
++ msr = rdmsr(HWCR);
++ msr.lo |= 1 << 24;
++ wrmsr(HWCR, msr);
++ }
+ }
+
+
+@@ -1011,8 +1053,7 @@ static int init_fidvid_bsp(u32 bsp_apicid, u32 nodes)
+ /* Steps 1-6 of BIOS NB COF and VID Configuration
+ * for SVI and Single-Plane PVI Systems.
+ */
+-
+- fv.common_fid = init_fidvid_core(0,0);
++ fv.common_fid = init_fidvid_core(0, 0);
+
+ print_debug_fv("BSP fid = ", fv.common_fid);
+
+diff --git a/src/cpu/amd/family_10h-family_15h/init_cpus.c b/src/cpu/amd/family_10h-family_15h/init_cpus.c
+index 8de6d25..aced850 100644
+--- a/src/cpu/amd/family_10h-family_15h/init_cpus.c
++++ b/src/cpu/amd/family_10h-family_15h/init_cpus.c
+@@ -30,9 +30,12 @@
+ #include <northbridge/amd/amdfam10/raminit_amdmct.c>
+ #include <reset.h>
+
++#if IS_ENABLED(CONFIG_SET_FIDVID)
+ static void prep_fid_change(void);
+ static void init_fidvid_stage2(u32 apicid, u32 nodeid);
+-void cpuSetAMDMSR(void);
++#endif
++
++void cpuSetAMDMSR(uint8_t node_id);
+
+ #if CONFIG_PCI_IO_CFG_EXT
+ static void set_EnableCf8ExtCfg(void)
+@@ -51,43 +54,38 @@ static void set_EnableCf8ExtCfg(void) { }
+
+ typedef void (*process_ap_t) (u32 apicid, void *gp);
+
+-//core_range = 0 : all cores
+-//core range = 1 : core 0 only
+-//core range = 2 : cores other than core0
++uint32_t get_boot_apic_id(uint8_t node, uint32_t core) {
++ uint32_t ap_apicid;
+
+-static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap,
+- void *gp)
+-{
+- // here assume the OS don't change our apicid
+- u32 ap_apicid;
++ uint32_t nb_cfg_54;
++ uint32_t siblings;
++ uint32_t cores_found;
+
+- u32 nodes;
+- u32 siblings;
+- u32 disable_siblings;
+- u32 cores_found;
+- u32 nb_cfg_54;
+- int i, j;
+- u32 ApicIdCoreIdSize;
++ uint8_t fam15h = 0;
+ uint8_t rev_gte_d = 0;
+ uint8_t dual_node = 0;
+ uint32_t f3xe8;
++ uint32_t family;
++ uint32_t model;
+
+- /* get_nodes define in ht_wrapper.c */
+- nodes = get_nodes();
+-
+- if (!CONFIG_LOGICAL_CPUS ||
+- read_option(multi_core, 0) != 0) { // 0 means multi core
+- disable_siblings = 1;
+- } else {
+- disable_siblings = 0;
+- }
++ uint32_t ApicIdCoreIdSize;
+
+ /* Assume that all node are same stepping, otherwise we can use use
+ nb_cfg_54 from bsp for all nodes */
+ nb_cfg_54 = read_nb_cfg_54();
+ f3xe8 = pci_read_config32(NODE_PCI(0, 3), 0xe8);
+
+- if (cpuid_eax(0x80000001) >= 0x8)
++ family = model = cpuid_eax(0x80000001);
++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f) {
++ /* Family 15h or later */
++ fam15h = 1;
++ nb_cfg_54 = 1;
++ }
++
++ if ((model >= 0x8) || fam15h)
+ /* Revision D or later */
+ rev_gte_d = 1;
+
+@@ -103,10 +101,63 @@ static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap,
+ siblings = 3; //quad core
+ }
+
++ cores_found = get_core_num_in_bsp(node);
++ if (siblings > cores_found)
++ siblings = cores_found;
++
++ if (dual_node) {
++ ap_apicid = 0;
++ if (fam15h) {
++ ap_apicid |= ((node >> 1) & 0x3) << 5; /* Node ID */
++ ap_apicid |= ((node & 0x1) * (siblings + 1)) + core; /* Core ID */
++ } else {
++ if (nb_cfg_54) {
++ ap_apicid |= ((node >> 1) & 0x3) << 4; /* Node ID */
++ ap_apicid |= ((node & 0x1) * (siblings + 1)) + core; /* Core ID */
++ } else {
++ ap_apicid |= node & 0x3; /* Node ID */
++ ap_apicid |= (((node & 0x1) * (siblings + 1)) + core) << 4; /* Core ID */
++ }
++ }
++ } else {
++ if (fam15h) {
++ ap_apicid = (node * (siblings + 1)) + core;
++ } else {
++ ap_apicid = node * (nb_cfg_54 ? (siblings + 1) : 1) +
++ core * (nb_cfg_54 ? 1 : 64);
++ }
++ }
++
++ return ap_apicid;
++}
++
++//core_range = 0 : all cores
++//core range = 1 : core 0 only
++//core range = 2 : cores other than core0
++
++static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap,
++ void *gp)
++{
++ // here assume the OS don't change our apicid
++ u32 ap_apicid;
++
++ u32 nodes;
++ u32 disable_siblings;
++ u32 cores_found;
++ int i, j;
++
++ /* get_nodes define in ht_wrapper.c */
++ nodes = get_nodes();
++
++ if (!CONFIG_LOGICAL_CPUS ||
++ read_option(multi_core, 0) != 0) { // 0 means multi core
++ disable_siblings = 1;
++ } else {
++ disable_siblings = 0;
++ }
++
+ for (i = 0; i < nodes; i++) {
+ cores_found = get_core_num_in_bsp(i);
+- if (siblings > cores_found)
+- siblings = cores_found;
+
+ u32 jstart, jend;
+
+@@ -123,21 +174,7 @@ static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap,
+ }
+
+ for (j = jstart; j <= jend; j++) {
+- if (dual_node) {
+- ap_apicid = 0;
+- if (nb_cfg_54) {
+- ap_apicid |= ((i >> 1) & 0x3) << 4; /* Node ID */
+- ap_apicid |= ((i & 0x1) * (siblings + 1)) + j; /* Core ID */
+- } else {
+- ap_apicid |= i & 0x3; /* Node ID */
+- ap_apicid |= (((i & 0x1) * (siblings + 1)) + j) << 4; /* Core ID */
+- }
+- } else {
+- ap_apicid =
+- i * (nb_cfg_54 ? (siblings + 1) : 1) +
+- j * (nb_cfg_54 ? 1 : 64);
+- }
+-
++ ap_apicid = get_boot_apic_id(i, j);
+
+ #if CONFIG_ENABLE_APIC_EXT_ID && (CONFIG_APIC_ID_OFFSET > 0)
+ #if !CONFIG_LIFT_BSP_APIC_ID
+@@ -197,7 +234,7 @@ void print_apicid_nodeid_coreid(u32 apicid, struct node_core_id id,
+ apicid, id.nodeid, id.coreid);
+ }
+
+-static u32 wait_cpu_state(u32 apicid, u32 state)
++uint32_t wait_cpu_state(uint32_t apicid, uint32_t state, uint32_t state2)
+ {
+ u32 readback = 0;
+ u32 timeout = 1;
+@@ -205,7 +242,7 @@ static u32 wait_cpu_state(u32 apicid, u32 state)
+ while (--loop > 0) {
+ if (lapic_remote_read(apicid, LAPIC_MSG_REG, &readback) != 0)
+ continue;
+- if ((readback & 0x3f) == state || (readback & 0x3f) == F10_APSTATE_RESET) {
++ if ((readback & 0x3f) == state || (readback & 0x3f) == state2 || (readback & 0x3f) == F10_APSTATE_RESET) {
+ timeout = 0;
+ break; //target cpu is in stage started
+ }
+@@ -222,7 +259,7 @@ static u32 wait_cpu_state(u32 apicid, u32 state)
+ static void wait_ap_started(u32 ap_apicid, void *gp)
+ {
+ u32 timeout;
+- timeout = wait_cpu_state(ap_apicid, F10_APSTATE_STARTED);
++ timeout = wait_cpu_state(ap_apicid, F10_APSTATE_STARTED, F10_APSTATE_ASLEEP);
+ printk(BIOS_DEBUG, "* AP %02x", ap_apicid);
+ if (timeout) {
+ printk(BIOS_DEBUG, " timed out:%08x\n", timeout);
+@@ -258,16 +295,27 @@ static void enable_apic_ext_id(u32 node)
+ pci_write_config32(NODE_HT(node), 0x68, val);
+ }
+
+-static void STOP_CAR_AND_CPU(void)
++static void STOP_CAR_AND_CPU(uint8_t skip_sharedc_config, uint32_t apicid)
+ {
+ msr_t msr;
++ uint32_t family;
++
++ family = amd_fam1x_cpu_family(); // inline
++
++ if (family < 0x6f) {
++ /* Family 10h or earlier */
++
++ /* Disable L2 IC to L3 connection (Only for CAR) */
++ msr = rdmsr(BU_CFG2);
++ msr.lo &= ~(1 << ClLinesToNbDis);
++ wrmsr(BU_CFG2, msr);
++ }
+
+- /* Disable L2 IC to L3 connection (Only for CAR) */
+- msr = rdmsr(BU_CFG2);
+- msr.lo &= ~(1 << ClLinesToNbDis);
+- wrmsr(BU_CFG2, msr);
++ disable_cache_as_ram(skip_sharedc_config); // inline
++
++ /* Mark the core as sleeping */
++ lapic_write(LAPIC_MSG_REG, (apicid << 24) | F10_APSTATE_ASLEEP);
+
+- disable_cache_as_ram(); // inline
+ /* stop all cores except node0/core0 the bsp .... */
+ stop_this_cpu();
+ }
+@@ -276,6 +324,7 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo)
+ {
+ u32 bsp_apicid = 0;
+ u32 apicid;
++ uint8_t set_mtrrs;
+ struct node_core_id id;
+
+ /* Please refer to the calculations and explaination in cache_as_ram.inc before modifying these values */
+@@ -362,7 +411,7 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo)
+ */
+ update_microcode(cpuid_eax(1));
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(id.nodeid);
+
+ #if CONFIG_SET_FIDVID
+ #if CONFIG_LOGICAL_CPUS && CONFIG_SET_FIDVID_CORE0_ONLY
+@@ -385,10 +434,29 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo)
+ }
+ #endif
+
++ if (is_fam15h()) {
++ /* core 1 on node 0 is special; to avoid corrupting the
++ * BSP do not alter MTRRs on that core */
++ if (apicid == 1)
++ set_mtrrs = 0;
++ else
++ set_mtrrs = !!(apicid & 0x1);
++ } else {
++ set_mtrrs = 1;
++ }
++
+ /* AP is ready, configure MTRRs and go to sleep */
+- set_var_mtrr(0, 0x00000000, CONFIG_RAMTOP, MTRR_TYPE_WRBACK);
++ if (set_mtrrs)
++ set_var_mtrr(0, 0x00000000, CONFIG_RAMTOP, MTRR_TYPE_WRBACK);
+
+- STOP_CAR_AND_CPU();
++ printk(BIOS_DEBUG, "Disabling CAR on AP %02x\n", apicid);
++ if (is_fam15h()) {
++ /* Only modify the MSRs on the odd cores (the last cores to finish booting) */
++ STOP_CAR_AND_CPU(!set_mtrrs, apicid);
++ } else {
++ /* Modify MSRs on all cores */
++ STOP_CAR_AND_CPU(0, apicid);
++ }
+
+ printk(BIOS_DEBUG,
+ "\nAP %02x should be halted but you are reading this....\n",
+@@ -496,7 +564,7 @@ static void setup_remote_node(u8 node)
+ }
+ #endif /* CONFIG_MAX_PHYSICAL_CPUS > 1 */
+
+-static void AMD_Errata281(u8 node, u32 revision, u32 platform)
++static void AMD_Errata281(u8 node, uint64_t revision, u32 platform)
+ {
+ /* Workaround for Transaction Scheduling Conflict in
+ * Northbridge Cross Bar. Implement XCS Token adjustment
+@@ -794,7 +862,7 @@ static void AMD_SetHtPhyRegister(u8 node, u8 link, u8 entry)
+ } while (!(val & HTPHY_IS_COMPLETE_MASK));
+ }
+
+-void cpuSetAMDMSR(void)
++void cpuSetAMDMSR(uint8_t node_id)
+ {
+ /* This routine loads the CPU with default settings in fam10_msr_default
+ * table . It must be run after Cache-As-RAM has been enabled, and
+@@ -804,7 +872,8 @@ void cpuSetAMDMSR(void)
+ */
+ msr_t msr;
+ u8 i;
+- u32 revision, platform;
++ u32 platform;
++ uint64_t revision;
+
+ printk(BIOS_DEBUG, "cpuSetAMDMSR ");
+
+@@ -824,6 +893,49 @@ void cpuSetAMDMSR(void)
+ }
+ AMD_Errata298();
+
++ if (revision & AMD_FAM15_ALL) {
++ uint32_t f5x80;
++ uint8_t enabled;
++ uint8_t compute_unit_count = 0;
++ f5x80 = pci_read_config32(NODE_PCI(node_id, 5), 0x80);
++ enabled = f5x80 & 0xf;
++ if (enabled == 0x1)
++ compute_unit_count = 1;
++ if (enabled == 0x3)
++ compute_unit_count = 2;
++ if (enabled == 0x7)
++ compute_unit_count = 3;
++ if (enabled == 0xf)
++ compute_unit_count = 4;
++ msr = rdmsr(BU_CFG2);
++ msr.lo &= ~(0x3 << 6); /* ThrottleNbInterface[1:0] */
++ msr.lo |= (((compute_unit_count - 1) & 0x3) << 6);
++ wrmsr(BU_CFG2, msr);
++ }
++
++ /* Revision C0 and above */
++ if (revision & AMD_OR_C0) {
++ uint32_t f3x1fc = pci_read_config32(NODE_PCI(node_id, 3), 0x1fc);
++ msr = rdmsr(FP_CFG);
++ msr.hi &= ~(0x7 << (42-32)); /* DiDtCfg4 */
++ msr.hi |= (((f3x1fc >> 17) & 0x7) << (42-32));
++ msr.hi &= ~(0x1 << (41-32)); /* DiDtCfg5 */
++ msr.hi |= (((f3x1fc >> 22) & 0x1) << (41-32));
++ msr.hi &= ~(0x1 << (40-32)); /* DiDtCfg3 */
++ msr.hi |= (((f3x1fc >> 16) & 0x1) << (40-32));
++ msr.hi &= ~(0x7 << (32-32)); /* DiDtCfg1 (1) */
++ msr.hi |= (((f3x1fc >> 11) & 0x7) << (32-32));
++ msr.lo &= ~(0x1f << 27); /* DiDtCfg1 (2) */
++ msr.lo |= (((f3x1fc >> 6) & 0x1f) << 27);
++ msr.lo &= ~(0x3 << 25); /* DiDtCfg2 */
++ msr.lo |= (((f3x1fc >> 14) & 0x3) << 25);
++ msr.lo &= ~(0x1f << 18); /* DiDtCfg0 */
++ msr.lo |= (((f3x1fc >> 1) & 0x1f) << 18);
++ msr.lo &= ~(0x1 << 16); /* DiDtMode */
++ msr.lo |= ((f3x1fc & 0x1) << 16);
++ wrmsr(FP_CFG, msr);
++ }
++
+ printk(BIOS_DEBUG, " done\n");
+ }
+
+@@ -835,9 +947,10 @@ static void cpuSetAMDPCI(u8 node)
+ * that it is run for the first core on each node
+ */
+ u8 i, j;
+- u32 revision, platform;
++ u32 platform;
+ u32 val;
+ u8 offset;
++ uint64_t revision;
+
+ printk(BIOS_DEBUG, "cpuSetAMDPCI %02d", node);
+
+@@ -899,6 +1012,7 @@ static void cpuSetAMDPCI(u8 node)
+ }
+
+ #ifdef UNUSED_CODE
++/* Clearing the MCA registers is apparently handled in the ramstage CPU Function 3 driver */
+ static void cpuInitializeMCA(void)
+ {
+ /* Clears Machine Check Architecture (MCA) registers, which power on
+diff --git a/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c b/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c
+index b942c1a..8a61f13 100644
+--- a/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c
++++ b/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c
+@@ -39,6 +39,23 @@
+
+ #define MCI_STATUS 0x401
+
++static inline uint8_t is_fam15h(void)
++{
++ uint8_t fam15h = 0;
++ uint32_t family;
++
++ family = cpuid_eax(0x80000001);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
++
++ return fam15h;
++}
++
++static volatile uint8_t fam15h_startup_flags[MAX_NODES_SUPPORTED][MAX_CORES_SUPPORTED] = {{ 0 }};
++
+ static void model_10xxx_init(device_t dev)
+ {
+ u8 i;
+@@ -47,13 +64,44 @@ static void model_10xxx_init(device_t dev)
+ #if CONFIG_LOGICAL_CPUS
+ u32 siblings;
+ #endif
++ uint8_t delay_start;
+
+ id = get_node_core_id(read_nb_cfg_54()); /* nb_cfg_54 can not be set */
+ printk(BIOS_DEBUG, "nodeid = %02d, coreid = %02d\n", id.nodeid, id.coreid);
+
++ if (is_fam15h())
++ delay_start = !!(id.coreid & 0x1);
++ else
++ delay_start = 0;
++
+ /* Turn on caching if we haven't already */
+ x86_enable_cache();
+- amd_setup_mtrrs();
++
++ if (!delay_start) {
++ /* Initialize all variable MTRRs except the first pair.
++ * This prevents Linux from having to correct an inconsistent
++ * MTRR setup, which would crash Family 15h CPUs due to the
++ * compute unit structure sharing MTRR MSRs between AP cores.
++ */
++ msr.hi = 0x00000000;
++ msr.lo = 0x00000000;
++
++ disable_cache();
++
++ for (i = 0x2; i < 0x10; i++) {
++ wrmsr(0x00000200 | i, msr);
++ }
++
++ enable_cache();
++
++ /* Set up other MTRRs */
++ amd_setup_mtrrs();
++ } else {
++ while (!fam15h_startup_flags[id.nodeid][id.coreid - 1]) {
++ /* Wait for CU first core startup */
++ }
++ }
++
+ x86_mtrr_check();
+
+ disable_cache();
+@@ -88,17 +136,24 @@ static void model_10xxx_init(device_t dev)
+ printk(BIOS_DEBUG, "siblings = %02d, ", siblings);
+ #endif
+
+- /* DisableCf8ExtCfg */
++ /* Disable Cf8ExtCfg */
+ msr = rdmsr(NB_CFG_MSR);
+ msr.hi &= ~(1 << (46 - 32));
+ wrmsr(NB_CFG_MSR, msr);
+
+- msr = rdmsr(BU_CFG2_MSR);
+- /* Clear ClLinesToNbDis */
+- msr.lo &= ~(1 << 15);
+- /* Clear bit 35 as per Erratum 343 */
+- msr.hi &= ~(1 << (35-32));
+- wrmsr(BU_CFG2_MSR, msr);
++ if (is_fam15h()) {
++ msr = rdmsr(BU_CFG3_MSR);
++ /* Set CombineCr0Cd */
++ msr.hi |= (1 << (49-32));
++ wrmsr(BU_CFG3_MSR, msr);
++ } else {
++ msr = rdmsr(BU_CFG2_MSR);
++ /* Clear ClLinesToNbDis */
++ msr.lo &= ~(1 << 15);
++ /* Clear bit 35 as per Erratum 343 */
++ msr.hi &= ~(1 << (35-32));
++ wrmsr(BU_CFG2_MSR, msr);
++ }
+
+ if (IS_ENABLED(CONFIG_HAVE_SMI_HANDLER)) {
+ printk(BIOS_DEBUG, "Initializing SMM ASeg memory\n");
+@@ -131,6 +186,7 @@ static void model_10xxx_init(device_t dev)
+ msr.lo |= (1 << 0);
+ wrmsr(HWCR_MSR, msr);
+
++ fam15h_startup_flags[id.nodeid][id.coreid] = 1;
+ }
+
+ static struct device_operations cpu_dev_ops = {
+@@ -147,15 +203,17 @@ static struct cpu_device_id cpu_table[] = {
+ { X86_VENDOR_AMD, 0x100f22 },
+ { X86_VENDOR_AMD, 0x100f23 },
+ { X86_VENDOR_AMD, 0x100f40 }, /* RB-C0 */
+- { X86_VENDOR_AMD, 0x100F42 }, /* RB-C2 */
+- { X86_VENDOR_AMD, 0x100F43 }, /* RB-C3 */
+- { X86_VENDOR_AMD, 0x100F52 }, /* BL-C2 */
+- { X86_VENDOR_AMD, 0x100F62 }, /* DA-C2 */
+- { X86_VENDOR_AMD, 0x100F63 }, /* DA-C3 */
+- { X86_VENDOR_AMD, 0x100F80 }, /* HY-D0 */
+- { X86_VENDOR_AMD, 0x100F81 }, /* HY-D1 */
+- { X86_VENDOR_AMD, 0x100F91 }, /* HY-D1 */
+- { X86_VENDOR_AMD, 0x100FA0 }, /* PH-E0 */
++ { X86_VENDOR_AMD, 0x100f42 }, /* RB-C2 */
++ { X86_VENDOR_AMD, 0x100f43 }, /* RB-C3 */
++ { X86_VENDOR_AMD, 0x100f52 }, /* BL-C2 */
++ { X86_VENDOR_AMD, 0x100f62 }, /* DA-C2 */
++ { X86_VENDOR_AMD, 0x100f63 }, /* DA-C3 */
++ { X86_VENDOR_AMD, 0x100f80 }, /* HY-D0 */
++ { X86_VENDOR_AMD, 0x100f81 }, /* HY-D1 */
++ { X86_VENDOR_AMD, 0x100f91 }, /* HY-D1 */
++ { X86_VENDOR_AMD, 0x100fa0 }, /* PH-E0 */
++ { X86_VENDOR_AMD, 0x600f12 }, /* OR-B2 */
++ { X86_VENDOR_AMD, 0x600f20 }, /* OR-C0 */
+ { 0, 0 },
+ };
+
+diff --git a/src/cpu/amd/family_10h-family_15h/powernow_acpi.c b/src/cpu/amd/family_10h-family_15h/powernow_acpi.c
+index 98ef08a..84e5514 100644
+--- a/src/cpu/amd/family_10h-family_15h/powernow_acpi.c
++++ b/src/cpu/amd/family_10h-family_15h/powernow_acpi.c
+@@ -74,8 +74,7 @@ static void write_pstates_for_core(u8 pstate_num, u16 *pstate_feq, u32 *pstate_p
+ /* Revision C or greater single-link processor */
+ cpuid1 = cpuid(0x80000008);
+ acpigen_write_PSD_package(0, (cpuid1.ecx & 0xff) + 1, SW_ALL);
+- }
+- else {
++ } else {
+ /* Find the local APIC ID for the specified core ID */
+ struct device* cpu;
+ int cpu_index = 0;
+@@ -99,7 +98,9 @@ static void write_pstates_for_core(u8 pstate_num, u16 *pstate_feq, u32 *pstate_p
+ }
+
+ /*
+-* For details of this algorithm, please refer to the BDKG 3.62 page 69
++* For details of this algorithm, please refer to:
++* Family 10h BDKG 3.62 page 69
++* Family 15h BDKG 3.14 page 74
+ *
+ * WARNING: The core count algorithm below assumes that all processors
+ * are identical, with the same number of active cores. While the BKDG
+@@ -149,6 +150,13 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP)
+ uint8_t node_count;
+ uint8_t cores_per_node;
+ uint8_t total_core_count;
++ uint8_t fam15h;
++ uint8_t fam10h_rev_e = 0;
++
++ /* Detect Revision E processors via method used in fidvid.c */
++ if ((cpuid_edx(0x80000007) & CPB_MASK)
++ && ((cpuid_ecx(0x80000008) & NC_MASK) == 5))
++ fam10h_rev_e = 1;
+
+ /*
+ * Based on the CPU socket type,cmp_cap and pwr_lmt , get the power limit.
+@@ -156,11 +164,17 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP)
+ * cmp_cap : 0x0 SingleCore ; 0x1 DualCore ; 0x2 TripleCore ; 0x3 QuadCore ; 0x4 QuintupleCore ; 0x5 HexCore
+ */
+ printk(BIOS_INFO, "Pstates algorithm ...\n");
++ fam15h = !!(mctGetLogicalCPUID(0) & AMD_FAM15_ALL);
+ /* Get number of cores */
+- dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 3)), 0xE8);
+- cmp_cap = (dtemp & 0x3000) >> 12;
+- if (mctGetLogicalCPUID(0) & AMD_FAM10_REV_D) /* revision D */
+- cmp_cap |= (dtemp & 0x8000) >> 13;
++ if (fam15h) {
++ cmp_cap = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 5)), 0x84) & 0xff;
++ } else {
++ dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 3)), 0xe8);
++ cmp_cap = (dtemp & 0x3000) >> 12;
++ if (mctGetLogicalCPUID(0) & (AMD_FAM10_REV_D | AMD_FAM15_ALL)) /* revision D or higher */
++ cmp_cap |= (dtemp & 0x8000) >> 13;
++ }
++
+ /* Get number of nodes */
+ dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 0)), 0x60);
+ node_count = ((dtemp & 0x70) >> 4) + 1;
+@@ -169,6 +183,14 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP)
+ /* Compute total number of cores installed in system */
+ total_core_count = cores_per_node * node_count;
+
++ /* Get number of boost states */
++ uint8_t boost_count = 0;
++ dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 4)), 0x15c);
++ if (fam10h_rev_e)
++ boost_count = (dtemp >> 2) & 0x1;
++ else if (mctGetLogicalCPUID(0) & AMD_FAM15_ALL)
++ boost_count = (dtemp >> 2) & 0x7;
++
+ Pstate_num = 0;
+
+ /* See if the CPUID(0x80000007) returned EDX[7]==1b */
+@@ -205,7 +227,7 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP)
+
+ /* Get PSmax's index */
+ msr = rdmsr(0xC0010061);
+- Pstate_max = (uint8_t) ((msr.lo >> PS_MAX_VAL_SHFT) & BIT_MASK_3);
++ Pstate_max = (uint8_t) ((msr.lo >> PS_MAX_VAL_SHFT) & ((fam15h)?BIT_MASK_7:BIT_MASK_3));
+
+ /* Determine if all enabled Pstates have the same fidvid */
+ uint8_t i;
+@@ -219,10 +241,14 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP)
+ }
+ }
+
++ /* Family 15h uses slightly different PSmax numbering */
++ if (fam15h)
++ Pstate_max++;
++
+ /* Populate tables with all Pstate information */
+ for (Pstate_num = 0; Pstate_num < Pstate_max; Pstate_num++) {
+ /* Get power state information */
+- msr = rdmsr(0xC0010064 + Pstate_num);
++ msr = rdmsr(0xC0010064 + Pstate_num + boost_count);
+ cpufid = (msr.lo & 0x3f);
+ cpudid = (msr.lo & 0x1c0) >> 6;
+ cpuvid = (msr.lo & 0xfe00) >> 9;
+@@ -232,12 +258,10 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP)
+ if (pviModeFlag) {
+ if (cpuvid >= 0x20) {
+ core_voltage = 7625 - (((cpuvid - 0x20) * 10000) / 80);
+- }
+- else {
++ } else {
+ core_voltage = 15500 - ((cpuvid * 10000) / 40);
+ }
+- }
+- else {
++ } else {
+ cpuvid = cpuvid & 0x7f;
+ if (cpuvid >= 0x7c)
+ core_voltage = 0;
+diff --git a/src/cpu/amd/family_10h-family_15h/processor_name.c b/src/cpu/amd/family_10h-family_15h/processor_name.c
+index 12c45c9..fbd0452 100644
+--- a/src/cpu/amd/family_10h-family_15h/processor_name.c
++++ b/src/cpu/amd/family_10h-family_15h/processor_name.c
+@@ -33,6 +33,10 @@
+ #include <cpu/amd/mtrr.h>
+ #include <cpu/cpu.h>
+ #include <cpu/amd/model_10xxx_rev.h>
++#include <device/device.h>
++#include <device/pci.h>
++#include <device/pnp.h>
++#include <device/pci_ops.h>
+
+ /* The maximum length of CPU names is 48 bytes, including the final NULL byte.
+ * If you change these names your BIOS will _NOT_ pass the AMD validation and
+@@ -212,104 +216,138 @@ static int strcpymax(char *dst, const char *src, int buflen)
+ return i;
+ }
+
++#define NAME_STRING_MAXLEN 48
+
+ int init_processor_name(void)
+ {
+- /* variable names taken from fam10 revision guide for clarity */
+- u32 BrandId; /* CPUID Fn8000_0001_EBX */
+- u8 String1; /* BrandID[14:11] */
+- u8 String2; /* BrandID[3:0] */
+- u8 Model; /* BrandID[10:4] */
+- u8 Pg; /* BrandID[15] */
+- u8 PkgTyp; /* BrandID[31:28] */
+- u8 NC; /* CPUID Fn8000_0008_ECX */
+- const char *processor_name_string = unknown;
+- char program_string[48];
+- u32 *p_program_string = (u32 *)program_string;
+ msr_t msr;
+- int i, j = 0, str2_checkNC = 1;
+- const struct str_s *str, *str2;
++ ssize_t i;
++ char program_string[NAME_STRING_MAXLEN];
++ u32 *p_program_string = (u32 *)program_string;
++ uint8_t fam15h = 0;
++ uint32_t family;
+
++ family = cpuid_eax(0x80000001);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
+
+- /* Find out which CPU brand it is */
+- BrandId = cpuid_ebx(0x80000001);
+- String1 = (u8)((BrandId >> 11) & 0x0F);
+- String2 = (u8)((BrandId >> 0) & 0x0F);
+- Model = (u8)((BrandId >> 4) & 0x7F);
+- Pg = (u8)((BrandId >> 15) & 0x01);
+- PkgTyp = (u8)((BrandId >> 28) & 0x0F);
+- NC = (u8)(cpuid_ecx(0x80000008) & 0xFF);
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
+
+ /* null the string */
+ memset(program_string, 0, sizeof(program_string));
+
+- if (!Model) {
+- processor_name_string = Pg ? thermal : sample;
+- goto done;
+- }
+-
+- switch (PkgTyp) {
+- case 0: /* F1207 */
+- str = String1_socket_F;
+- str2 = String2_socket_F;
+- str2_checkNC = 0;
+- break;
+- case 1: /* AM2 */
+- str = String1_socket_AM2;
+- str2 = String2_socket_AM2;
+- break;
+- case 3: /* G34 */
+- str = String1_socket_G34;
+- str2 = String2_socket_G34;
+- str2_checkNC = 0;
+- break;
+- case 5: /* C32 */
+- str = String1_socket_C32;
+- str2 = String2_socket_C32;
+- break;
+- default:
+- goto done;
+- }
++ if (fam15h) {
++ /* Family 15h or later */
++ uint32_t dword;
++ device_t cpu_fn5_dev = dev_find_slot(0, PCI_DEVFN(0x18, 5));
++ pci_write_config32(cpu_fn5_dev, 0x194, 0);
++ dword = pci_read_config32(cpu_fn5_dev, 0x198);
++ if (dword == 0) {
++ strcpymax(program_string, sample, sizeof(program_string));
++ } else {
++ /* Assemble the string from PCI configuration register contents */
++ for (i = 0; i < 12; i++) {
++ pci_write_config32(cpu_fn5_dev, 0x194, i);
++ p_program_string[i] = pci_read_config32(cpu_fn5_dev, 0x198);
++ }
++
++ /* Correctly place the null terminator */
++ for (i = (NAME_STRING_MAXLEN - 2); i > 0; i--) {
++ if (program_string[i] != 0x20)
++ break;
++ }
++ program_string[i + 1] = 0;
++ }
++ } else {
++ /* variable names taken from fam10 revision guide for clarity */
++ u32 BrandId; /* CPUID Fn8000_0001_EBX */
++ u8 String1; /* BrandID[14:11] */
++ u8 String2; /* BrandID[3:0] */
++ u8 Model; /* BrandID[10:4] */
++ u8 Pg; /* BrandID[15] */
++ u8 PkgTyp; /* BrandID[31:28] */
++ u8 NC; /* CPUID Fn8000_0008_ECX */
++ const char *processor_name_string = unknown;
++ int j = 0, str2_checkNC = 1;
++ const struct str_s *str, *str2;
++
++ /* Find out which CPU brand it is */
++ BrandId = cpuid_ebx(0x80000001);
++ String1 = (u8)((BrandId >> 11) & 0x0F);
++ String2 = (u8)((BrandId >> 0) & 0x0F);
++ Model = (u8)((BrandId >> 4) & 0x7F);
++ Pg = (u8)((BrandId >> 15) & 0x01);
++ PkgTyp = (u8)((BrandId >> 28) & 0x0F);
++ NC = (u8)(cpuid_ecx(0x80000008) & 0xFF);
++
++ if (!Model) {
++ processor_name_string = Pg ? thermal : sample;
++ goto done;
++ }
+
+- /* String1 */
+- for (i = 0; str[i].value; i++) {
+- if ((str[i].Pg == Pg) &&
+- (str[i].NC == NC) &&
+- (str[i].String == String1)) {
+- processor_name_string = str[i].value;
++ switch (PkgTyp) {
++ case 0: /* F1207 */
++ str = String1_socket_F;
++ str2 = String2_socket_F;
++ str2_checkNC = 0;
++ break;
++ case 1: /* AM2 */
++ str = String1_socket_AM2;
++ str2 = String2_socket_AM2;
++ break;
++ case 3: /* G34 */
++ str = String1_socket_G34;
++ str2 = String2_socket_G34;
++ str2_checkNC = 0;
++ break;
++ case 5: /* C32 */
++ str = String1_socket_C32;
++ str2 = String2_socket_C32;
+ break;
++ default:
++ goto done;
+ }
+- }
+
+- if (!str[i].value)
+- goto done;
++ /* String1 */
++ for (i = 0; str[i].value; i++) {
++ if ((str[i].Pg == Pg) &&
++ (str[i].NC == NC) &&
++ (str[i].String == String1)) {
++ processor_name_string = str[i].value;
++ break;
++ }
++ }
+
+- j = strcpymax(program_string, processor_name_string,
+- sizeof(program_string));
++ if (!str[i].value)
++ goto done;
+
+- /* Translate Model from 01-99 to ASCII and put it on the end.
+- * Numbers less than 10 should include a leading zero, e.g., 09.*/
+- if (Model < 100 && j < sizeof(program_string) - 2) {
+- program_string[j++] = (Model / 10) + '0';
+- program_string[j++] = (Model % 10) + '0';
+- }
++ j = strcpymax(program_string, processor_name_string,
++ sizeof(program_string));
+
+- processor_name_string = unknown2;
+-
+- /* String 2 */
+- for(i = 0; str2[i].value; i++) {
+- if ((str2[i].Pg == Pg) &&
+- ((str2[i].NC == NC) || !str2_checkNC) &&
+- (str2[i].String == String2)) {
+- processor_name_string = str2[i].value;
+- break;
++ /* Translate Model from 01-99 to ASCII and put it on the end.
++ * Numbers less than 10 should include a leading zero, e.g., 09.*/
++ if (Model < 100 && j < sizeof(program_string) - 2) {
++ program_string[j++] = (Model / 10) + '0';
++ program_string[j++] = (Model % 10) + '0';
+ }
+- }
+
++ processor_name_string = unknown2;
++
++ /* String 2 */
++ for(i = 0; str2[i].value; i++) {
++ if ((str2[i].Pg == Pg) &&
++ ((str2[i].NC == NC) || !str2_checkNC) &&
++ (str2[i].String == String2)) {
++ processor_name_string = str2[i].value;
++ break;
++ }
++ }
+
+-done:
+- strcpymax(&program_string[j], processor_name_string,
+- sizeof(program_string) - j);
++ done:
++ strcpymax(&program_string[j], processor_name_string,
++ sizeof(program_string) - j);
++ }
+
+ printk(BIOS_DEBUG, "CPU model: %s\n", program_string);
+
+diff --git a/src/cpu/amd/family_10h-family_15h/update_microcode.c b/src/cpu/amd/family_10h-family_15h/update_microcode.c
+index 51aca35..3b2f5dd 100644
+--- a/src/cpu/amd/family_10h-family_15h/update_microcode.c
++++ b/src/cpu/amd/family_10h-family_15h/update_microcode.c
+@@ -28,6 +28,7 @@ struct id_mapping {
+
+ static u16 get_equivalent_processor_rev_id(u32 orig_id) {
+ static const struct id_mapping id_mapping_table[] = {
++ /* Family 10h */
+ { 0x100f00, 0x1000 },
+ { 0x100f01, 0x1000 },
+ { 0x100f02, 0x1000 },
+@@ -42,8 +43,13 @@ static u16 get_equivalent_processor_rev_id(u32 orig_id) {
+ { 0x100f62, 0x1062 }, /* DA-C2 */
+ { 0x100f63, 0x1043 }, /* DA-C3 */
+ { 0x100f81, 0x1081 }, /* HY-D1 */
++ { 0x100f91, 0x1081 }, /* HY-D1 */
+ { 0x100fa0, 0x10A0 }, /* PH-E0 */
+
++ /* Family 15h */
++ { 0x600f12, 0x6012 }, /* OR-B2 */
++ { 0x600f20, 0x6020 }, /* OR-C0 */
++
+ /* Array terminator */
+ { 0xffffff, 0x0000 },
+ };
+diff --git a/src/cpu/amd/model_fxx/init_cpus.c b/src/cpu/amd/model_fxx/init_cpus.c
+index 12d3a95..3960c03 100644
+--- a/src/cpu/amd/model_fxx/init_cpus.c
++++ b/src/cpu/amd/model_fxx/init_cpus.c
+@@ -190,7 +190,7 @@ void allow_all_aps_stop(u32 bsp_apicid)
+
+ static void STOP_CAR_AND_CPU(void)
+ {
+- disable_cache_as_ram(); // inline
++ disable_cache_as_ram(0); // inline
+ /* stop all cores except node0/core0 the bsp .... */
+ stop_this_cpu();
+ }
+diff --git a/src/cpu/amd/quadcore/quadcore.c b/src/cpu/amd/quadcore/quadcore.c
+index 9c21e94..8a9b5ed 100644
+--- a/src/cpu/amd/quadcore/quadcore.c
++++ b/src/cpu/amd/quadcore/quadcore.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -26,16 +27,41 @@
+
+ #include "cpu/amd/quadcore/quadcore_id.c"
+
++/* get_boot_apic_id and wait_cpu_state located in init_cpus.c */
++uint32_t get_boot_apic_id(uint8_t node, uint32_t core);
++uint32_t wait_cpu_state(uint32_t apicid, uint32_t state, uint32_t state2);
++
++static inline uint8_t is_fam15h(void)
++{
++ uint8_t fam15h = 0;
++ uint32_t family;
++
++ family = cpuid_eax(0x80000001);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
++
++ return fam15h;
++}
++
+ static u32 get_core_num_in_bsp(u32 nodeid)
+ {
+ u32 dword;
+- dword = pci_read_config32(NODE_PCI(nodeid, 3), 0xe8);
+- dword >>= 12;
+- /* Bit 15 is CmpCap[2] since Revision D. */
+- if ((cpuid_ecx(0x80000008) & 0xff) > 3)
+- dword = ((dword & 8) >> 1) | (dword & 3);
+- else
+- dword &= 3;
++ if (is_fam15h()) {
++ /* Family 15h moved CmpCap to F5x84 [7:0] */
++ dword = pci_read_config32(NODE_PCI(nodeid, 5), 0x84);
++ dword &= 0xff;
++ } else {
++ dword = pci_read_config32(NODE_PCI(nodeid, 3), 0xe8);
++ dword >>= 12;
++ /* Bit 15 is CmpCap[2] since Revision D. */
++ if ((cpuid_ecx(0x80000008) & 0xff) > 3)
++ dword = ((dword & 8) >> 1) | (dword & 3);
++ else
++ dword &= 3;
++ }
+ return dword;
+ }
+
+@@ -50,28 +76,68 @@ static u8 set_apicid_cpuid_lo(void)
+ return 1;
+ }
+
+-static void real_start_other_core(u32 nodeid, u32 cores)
++static void real_start_other_core(uint32_t nodeid, uint32_t cores)
+ {
+- u32 dword, i;
++ ssize_t i;
++ uint32_t dword;
+
+ printk(BIOS_DEBUG, "Start other core - nodeid: %02x cores: %02x\n", nodeid, cores);
+
+ /* set PCI_DEV(0, 0x18+nodeid, 3), 0x44 bit 27 to redirect all MC4
+ accesses and error logging to core0 */
+ dword = pci_read_config32(NODE_PCI(nodeid, 3), 0x44);
+- dword |= 1 << 27; // NbMcaToMstCpuEn bit
++ dword |= 1 << 30; /* SyncFloodOnDramAdrParErr=1 */
++ dword |= 1 << 27; /* NbMcaToMstCpuEn=1 */
++ dword |= 1 << 21; /* SyncFloodOnAnyUcErr=1 */
++ dword |= 1 << 20; /* SyncFloodOnWDT=1 */
++ dword |= 1 << 2; /* SyncFloodOnDramUcEcc=1 */
+ pci_write_config32(NODE_PCI(nodeid, 3), 0x44, dword);
+- // set PCI_DEV(0, 0x18+nodeid, 0), 0x68 bit 5 to start core1
+- dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x68);
+- dword |= 1 << 5;
+- pci_write_config32(NODE_PCI(nodeid, 0), 0x68, dword);
+-
+- if(cores > 1) {
+- dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x168);
+- for (i = 0; i < cores - 1; i++) {
+- dword |= 1 << i;
++ if (is_fam15h()) {
++ uint32_t core_activation_flags = 0;
++ uint32_t active_cores = 0;
++
++ /* Set PCI_DEV(0, 0x18+nodeid, 0), 0x1dc bits 7:1 to start cores */
++ dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x1dc);
++ for (i = 1; i < cores + 1; i++) {
++ core_activation_flags |= 1 << i;
++ }
++
++ /* Start the first core of each compute unit */
++ active_cores |= core_activation_flags & 0x55;
++ pci_write_config32(NODE_PCI(nodeid, 0), 0x1dc, dword | active_cores);
++
++ /* Each core shares a single set of MTRR registers with
++ * another core in the same compute unit, therefore, it
++ * is important that one core in each CU starts in advance
++ * of the other in order to avoid one core stomping all over
++ * the other core's settings.
++ */
++
++ /* Wait for the first core of each compute unit to start... */
++ uint32_t timeout;
++ for (i = 1; i < cores + 1; i++) {
++ if (!(i & 0x1)) {
++ uint32_t ap_apicid = get_boot_apic_id(nodeid, i);
++ timeout = wait_cpu_state(ap_apicid, F10_APSTATE_ASLEEP, F10_APSTATE_ASLEEP);
++ }
++ }
++
++ /* Start the second core of each compute unit */
++ active_cores |= core_activation_flags & 0xaa;
++ pci_write_config32(NODE_PCI(nodeid, 0), 0x1dc, dword | active_cores);
++ } else {
++ // set PCI_DEV(0, 0x18+nodeid, 0), 0x68 bit 5 to start core1
++ dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x68);
++ dword |= 1 << 5;
++ pci_write_config32(NODE_PCI(nodeid, 0), 0x68, dword);
++
++ if (cores > 1) {
++ dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x168);
++ for (i = 0; i < cores - 1; i++) {
++ dword |= 1 << i;
++ }
++ pci_write_config32(NODE_PCI(nodeid, 0), 0x168, dword);
+ }
+- pci_write_config32(NODE_PCI(nodeid, 0), 0x168, dword);
+ }
+ }
+
+@@ -91,10 +157,9 @@ static void start_other_cores(void)
+
+ for (nodeid = 0; nodeid < nodes; nodeid++) {
+ u32 cores = get_core_num_in_bsp(nodeid);
+- printk(BIOS_DEBUG, "init node: %02x cores: %02x \n", nodeid, cores);
++ printk(BIOS_DEBUG, "init node: %02x cores: %02x pass 1 \n", nodeid, cores);
+ if (cores > 0) {
+ real_start_other_core(nodeid, cores);
+ }
+ }
+-
+ }
+diff --git a/src/cpu/amd/quadcore/quadcore_id.c b/src/cpu/amd/quadcore/quadcore_id.c
+index c5921de..c0537b3 100644
+--- a/src/cpu/amd/quadcore/quadcore_id.c
++++ b/src/cpu/amd/quadcore/quadcore_id.c
+@@ -43,9 +43,12 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54)
+ {
+ struct node_core_id id;
+ uint8_t apicid;
++ uint8_t fam15h = 0;
+ uint8_t rev_gte_d = 0;
+ uint8_t dual_node = 0;
+ uint32_t f3xe8;
++ uint32_t family;
++ uint32_t model;
+
+ #ifdef __PRE_RAM__
+ f3xe8 = pci_read_config32(NODE_PCI(0, 3), 0xe8);
+@@ -53,7 +56,17 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54)
+ f3xe8 = pci_read_config32(get_node_pci(0, 3), 0xe8);
+ #endif
+
+- if (cpuid_eax(0x80000001) >= 0x8)
++ family = model = cpuid_eax(0x80000001);
++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f) {
++ /* Family 15h or later */
++ fam15h = 1;
++ nb_cfg_54 = 1;
++ }
++
++ if ((model >= 0x8) || fam15h)
+ /* Revision D or later */
+ rev_gte_d = 1;
+
+@@ -67,7 +80,13 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54)
+ */
+ apicid = (cpuid_ebx(1) >> 24) & 0xff;
+ if( nb_cfg_54) {
+- if (rev_gte_d && dual_node) {
++ if (fam15h && dual_node) {
++ id.coreid = apicid & 0x1f;
++ id.nodeid = (apicid & 0x60) >> 5;
++ } else if (fam15h && !dual_node) {
++ id.coreid = apicid & 0xf;
++ id.nodeid = (apicid & 0x70) >> 4;
++ } else if (rev_gte_d && dual_node) {
+ id.coreid = apicid & 0xf;
+ id.nodeid = (apicid & 0x30) >> 4;
+ } else if (rev_gte_d && !dual_node) {
+@@ -90,7 +109,25 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54)
+ }
+ }
+
+- if (rev_gte_d && dual_node) {
++ if (fam15h && dual_node) {
++ /* Coreboot expects each separate processor die to be on a different nodeid.
++ * Since the code above returns nodeid 0 even on internal node 1 some fixup is needed...
++ */
++ uint32_t f5x84;
++ uint8_t core_count;
++
++#ifdef __PRE_RAM__
++ f5x84 = pci_read_config32(NODE_PCI(0, 5), 0x84);
++#else
++ f5x84 = pci_read_config32(get_node_pci(0, 5), 0x84);
++#endif
++ core_count = (f5x84 & 0xff) + 1;
++ id.nodeid = id.nodeid * 2;
++ if (id.coreid >= core_count) {
++ id.nodeid += 1;
++ id.coreid = id.coreid - core_count;
++ }
++ } else if (rev_gte_d && dual_node) {
+ /* Coreboot expects each separate processor die to be on a different nodeid.
+ * Since the code above returns nodeid 0 even on internal node 1 some fixup is needed...
+ */
+diff --git a/src/include/cpu/amd/model_10xxx_msr.h b/src/include/cpu/amd/model_10xxx_msr.h
+index 6c7dece..7d78e2d 100644
+--- a/src/include/cpu/amd/model_10xxx_msr.h
++++ b/src/include/cpu/amd/model_10xxx_msr.h
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -32,7 +33,13 @@
+ #define IC_CFG_MSR 0xC0011021
+ #define DC_CFG_MSR 0xC0011022
+ #define BU_CFG_MSR 0xC0011023
++#define FP_CFG_MSR 0xC0011028
++#define DE_CFG_MSR 0xC0011029
+ #define BU_CFG2_MSR 0xC001102A
++#define BU_CFG3_MSR 0xC001102B
++#define EX_CFG_MSR 0xC001102C
++#define LS_CFG2_MSR 0xC001102D
++#define IBS_OP_DATA3_MSR 0xC0011037
+
+ #define CPU_ID_FEATURES_MSR 0xC0011004
+ #define CPU_ID_HYPER_EXT_FEATURES 0xC001100d
+diff --git a/src/mainboard/advansus/a785e-i/romstage.c b/src/mainboard/advansus/a785e-i/romstage.c
+index 4c2b38a..ab717fd 100644
+--- a/src/mainboard/advansus/a785e-i/romstage.c
++++ b/src/mainboard/advansus/a785e-i/romstage.c
+@@ -131,7 +131,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/amd/bimini_fam10/romstage.c b/src/mainboard/amd/bimini_fam10/romstage.c
+index e2bd351..5e2cf82 100644
+--- a/src/mainboard/amd/bimini_fam10/romstage.c
++++ b/src/mainboard/amd/bimini_fam10/romstage.c
+@@ -123,7 +123,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/amd/mahogany_fam10/romstage.c b/src/mainboard/amd/mahogany_fam10/romstage.c
+index 74bc9d5..025a8bb 100644
+--- a/src/mainboard/amd/mahogany_fam10/romstage.c
++++ b/src/mainboard/amd/mahogany_fam10/romstage.c
+@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c b/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c
+index 20d46e6..5063439 100644
+--- a/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c
++++ b/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c
+@@ -231,7 +231,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/amd/tilapia_fam10/romstage.c b/src/mainboard/amd/tilapia_fam10/romstage.c
+index 89100b1..e37bc08 100644
+--- a/src/mainboard/amd/tilapia_fam10/romstage.c
++++ b/src/mainboard/amd/tilapia_fam10/romstage.c
+@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/asus/kfsn4-dre/romstage.c b/src/mainboard/asus/kfsn4-dre/romstage.c
+index 5d1f5a6..dd5c7dc 100644
+--- a/src/mainboard/asus/kfsn4-dre/romstage.c
++++ b/src/mainboard/asus/kfsn4-dre/romstage.c
+@@ -245,7 +245,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/asus/kgpe-d16/romstage.c b/src/mainboard/asus/kgpe-d16/romstage.c
+index a3f3310..a58fd0f 100644
+--- a/src/mainboard/asus/kgpe-d16/romstage.c
++++ b/src/mainboard/asus/kgpe-d16/romstage.c
+@@ -354,7 +354,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+@@ -512,4 +512,4 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+ BOOL AMD_CB_ManualBUIDSwapList (u8 node, u8 link, const u8 **List)
+ {
+ return 0;
+-}
+\ No newline at end of file
++}
+diff --git a/src/mainboard/asus/m4a78-em/romstage.c b/src/mainboard/asus/m4a78-em/romstage.c
+index 82f30d9..82b96bf 100644
+--- a/src/mainboard/asus/m4a78-em/romstage.c
++++ b/src/mainboard/asus/m4a78-em/romstage.c
+@@ -127,7 +127,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/asus/m4a785-m/romstage.c b/src/mainboard/asus/m4a785-m/romstage.c
+index 780bf81..30975fa 100644
+--- a/src/mainboard/asus/m4a785-m/romstage.c
++++ b/src/mainboard/asus/m4a785-m/romstage.c
+@@ -127,7 +127,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/asus/m5a88-v/romstage.c b/src/mainboard/asus/m5a88-v/romstage.c
+index 38761a6..4edaba2 100644
+--- a/src/mainboard/asus/m5a88-v/romstage.c
++++ b/src/mainboard/asus/m5a88-v/romstage.c
+@@ -128,7 +128,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/avalue/eax-785e/romstage.c b/src/mainboard/avalue/eax-785e/romstage.c
+index 764a5c6..447012b 100644
+--- a/src/mainboard/avalue/eax-785e/romstage.c
++++ b/src/mainboard/avalue/eax-785e/romstage.c
+@@ -132,7 +132,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/gigabyte/ma785gm/romstage.c b/src/mainboard/gigabyte/ma785gm/romstage.c
+index db4e449..444e59d 100644
+--- a/src/mainboard/gigabyte/ma785gm/romstage.c
++++ b/src/mainboard/gigabyte/ma785gm/romstage.c
+@@ -122,7 +122,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/gigabyte/ma785gmt/romstage.c b/src/mainboard/gigabyte/ma785gmt/romstage.c
+index 4ce7c58..705d7c5 100644
+--- a/src/mainboard/gigabyte/ma785gmt/romstage.c
++++ b/src/mainboard/gigabyte/ma785gmt/romstage.c
+@@ -122,7 +122,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/gigabyte/ma78gm/romstage.c b/src/mainboard/gigabyte/ma78gm/romstage.c
+index d2a0b95..5d21801 100644
+--- a/src/mainboard/gigabyte/ma78gm/romstage.c
++++ b/src/mainboard/gigabyte/ma78gm/romstage.c
+@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/hp/dl165_g6_fam10/romstage.c b/src/mainboard/hp/dl165_g6_fam10/romstage.c
+index 97e60d5..26c0bb9 100644
+--- a/src/mainboard/hp/dl165_g6_fam10/romstage.c
++++ b/src/mainboard/hp/dl165_g6_fam10/romstage.c
+@@ -137,7 +137,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/iei/kino-780am2-fam10/romstage.c b/src/mainboard/iei/kino-780am2-fam10/romstage.c
+index edbae3a..321eea6 100644
+--- a/src/mainboard/iei/kino-780am2-fam10/romstage.c
++++ b/src/mainboard/iei/kino-780am2-fam10/romstage.c
+@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/jetway/pa78vm5/romstage.c b/src/mainboard/jetway/pa78vm5/romstage.c
+index 16bb089..93dd2ce 100644
+--- a/src/mainboard/jetway/pa78vm5/romstage.c
++++ b/src/mainboard/jetway/pa78vm5/romstage.c
+@@ -130,7 +130,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/msi/ms9652_fam10/romstage.c b/src/mainboard/msi/ms9652_fam10/romstage.c
+index 4ea3306..5da971f 100644
+--- a/src/mainboard/msi/ms9652_fam10/romstage.c
++++ b/src/mainboard/msi/ms9652_fam10/romstage.c
+@@ -150,7 +150,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/supermicro/h8dmr_fam10/romstage.c b/src/mainboard/supermicro/h8dmr_fam10/romstage.c
+index c224dbc..1425546 100644
+--- a/src/mainboard/supermicro/h8dmr_fam10/romstage.c
++++ b/src/mainboard/supermicro/h8dmr_fam10/romstage.c
+@@ -146,7 +146,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/supermicro/h8qme_fam10/romstage.c b/src/mainboard/supermicro/h8qme_fam10/romstage.c
+index 0f9445b..4721eba 100644
+--- a/src/mainboard/supermicro/h8qme_fam10/romstage.c
++++ b/src/mainboard/supermicro/h8qme_fam10/romstage.c
+@@ -214,7 +214,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/mainboard/supermicro/h8scm_fam10/romstage.c b/src/mainboard/supermicro/h8scm_fam10/romstage.c
+index 4ea14fe..858aca0 100644
+--- a/src/mainboard/supermicro/h8scm_fam10/romstage.c
++++ b/src/mainboard/supermicro/h8scm_fam10/romstage.c
+@@ -136,7 +136,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ /* TODO: The Kernel must support 12 processor, otherwise the interrupt
+diff --git a/src/mainboard/tyan/s2912_fam10/romstage.c b/src/mainboard/tyan/s2912_fam10/romstage.c
+index 0030619..cdf51b1 100644
+--- a/src/mainboard/tyan/s2912_fam10/romstage.c
++++ b/src/mainboard/tyan/s2912_fam10/romstage.c
+@@ -149,7 +149,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx)
+
+ post_code(0x33);
+
+- cpuSetAMDMSR();
++ cpuSetAMDMSR(0);
+ post_code(0x34);
+
+ amd_ht_init(sysinfo);
+diff --git a/src/northbridge/amd/amdfam10/Kconfig b/src/northbridge/amd/amdfam10/Kconfig
+index ada5b9f..cb0d109 100644
+--- a/src/northbridge/amd/amdfam10/Kconfig
++++ b/src/northbridge/amd/amdfam10/Kconfig
+@@ -96,7 +96,7 @@ endif
+ if HAVE_ACPI_RESUME
+ config S3_DATA_SIZE
+ int
+- default 16384
++ default 32768
+ endif
+
+ if DIMM_DDR2
+diff --git a/src/northbridge/amd/amdfam10/Makefile.inc b/src/northbridge/amd/amdfam10/Makefile.inc
+index b4097b4..4098dce 100644
+--- a/src/northbridge/amd/amdfam10/Makefile.inc
++++ b/src/northbridge/amd/amdfam10/Makefile.inc
+@@ -2,6 +2,8 @@ ifeq ($(CONFIG_NORTHBRIDGE_AMD_AMDFAM10),y)
+
+ ramstage-y += northbridge.c
+ ramstage-y += misc_control.c
++ramstage-y += link_control.c
++ramstage-y += nb_control.c
+ romstage-y += amdfam10_util.c
+ ramstage-y += amdfam10_util.c
+
+diff --git a/src/northbridge/amd/amdfam10/amdfam10.h b/src/northbridge/amd/amdfam10/amdfam10.h
+index a1e08a0..b724394 100644
+--- a/src/northbridge/amd/amdfam10/amdfam10.h
++++ b/src/northbridge/amd/amdfam10/amdfam10.h
+@@ -962,9 +962,12 @@ that are corresponding to 0x01, 0x02, 0x03, 0x05, 0x06, 0x07
+
+ #define LAPIC_MSG_REG 0x380
+ #define F10_APSTATE_STARTED 0x13 // start of AP execution
+-#define F10_APSTATE_STOPPED 0x14 // allow AP to stop
++#define F10_APSTATE_ASLEEP 0x14 // AP sleeping
++#define F10_APSTATE_STOPPED 0x15 // allow AP to stop
+ #define F10_APSTATE_RESET 0x01 // waiting for warm reset
+
++#define MAX_CORES_SUPPORTED 128
++
+ #include "nums.h"
+
+ #ifdef __PRE_RAM__
+@@ -1038,7 +1041,6 @@ struct sys_info {
+
+ struct MCTStatStruc MCTstat;
+ struct DCTStatStruc DCTstatA[NODE_NUMS];
+-
+ } __attribute__((packed));
+
+ #ifdef __PRE_RAM__
+diff --git a/src/northbridge/amd/amdfam10/amdfam10_util.c b/src/northbridge/amd/amdfam10/amdfam10_util.c
+index 423bb73..a4045bdf 100644
+--- a/src/northbridge/amd/amdfam10/amdfam10_util.c
++++ b/src/northbridge/amd/amdfam10/amdfam10_util.c
+@@ -34,14 +34,14 @@ u32 Get_NB32(u32 dev, u32 reg)
+ }
+ #endif
+
+-u32 mctGetLogicalCPUID(u32 Node)
++uint64_t mctGetLogicalCPUID(u32 Node)
+ {
+ /* Converts the CPUID to a logical ID MASK that is used to check
+ CPU version support versions */
+ u32 dev;
+ u32 val, valx;
+ u32 family, model, stepping;
+- u32 ret;
++ uint64_t ret;
+
+ if (Node == 0xFF) { /* current node */
+ val = cpuid_eax(0x80000001);
+@@ -100,9 +100,16 @@ u32 mctGetLogicalCPUID(u32 Node)
+ case 0x100a0:
+ ret = AMD_PH_E0;
+ break;
++ case 0x15012:
++ case 0x1501f:
++ ret = AMD_OR_B2;
++ break;
++ case 0x15020:
++ ret = AMD_OR_C0;
++ break;
+ default:
+ /* FIXME: mabe we should die() here. */
+- printk(BIOS_ERR, "FIXME! CPU Version unknown or not supported! \n");
++ printk(BIOS_ERR, "FIXME! CPU Version unknown or not supported! %08x\n", valx);
+ ret = 0;
+ }
+
+diff --git a/src/northbridge/amd/amdfam10/link_control.c b/src/northbridge/amd/amdfam10/link_control.c
+new file mode 100644
+index 0000000..1091ef4
+--- /dev/null
++++ b/src/northbridge/amd/amdfam10/link_control.c
+@@ -0,0 +1,86 @@
++/*
++ * This file is part of the coreboot project.
++ *
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/* Configure various power control registers, including processor
++ * boost support.
++ */
++
++#include <console/console.h>
++#include <device/device.h>
++#include <device/pci.h>
++#include <device/pci_ids.h>
++#include <device/pci_ops.h>
++#include <pc80/mc146818rtc.h>
++#include <lib.h>
++#include <cpu/amd/model_10xxx_rev.h>
++
++#include "amdfam10.h"
++
++static inline uint8_t is_fam15h(void)
++{
++ uint8_t fam15h = 0;
++ uint32_t family;
++
++ family = cpuid_eax(0x80000001);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
++
++ return fam15h;
++}
++
++static void nb_control_init(struct device *dev)
++{
++ uint32_t dword;
++
++ printk(BIOS_DEBUG, "NB: Function 4 Link Control.. ");
++
++ if (is_fam15h()) {
++ /* Enable APM */
++ dword = pci_read_config32(dev, 0x15c);
++ dword |= (0x1 << 7); /* ApmMasterEn = 1 */
++ pci_write_config32(dev, 0x15c, dword);
++ }
++
++ printk(BIOS_DEBUG, "done.\n");
++}
++
++
++static struct device_operations mcf4_ops = {
++ .read_resources = pci_dev_read_resources,
++ .set_resources = pci_dev_set_resources,
++ .enable_resources = pci_dev_enable_resources,
++ .init = nb_control_init,
++ .scan_bus = 0,
++ .ops_pci = 0,
++};
++
++static const struct pci_driver mcf4_driver_fam10 __pci_driver = {
++ .ops = &mcf4_ops,
++ .vendor = PCI_VENDOR_ID_AMD,
++ .device = 0x1204,
++};
++
++static const struct pci_driver mcf4_driver_fam15 __pci_driver = {
++ .ops = &mcf4_ops,
++ .vendor = PCI_VENDOR_ID_AMD,
++ .device = 0x1604,
++};
+\ No newline at end of file
+diff --git a/src/northbridge/amd/amdfam10/misc_control.c b/src/northbridge/amd/amdfam10/misc_control.c
+index 90a4db1..8777e8f 100644
+--- a/src/northbridge/amd/amdfam10/misc_control.c
++++ b/src/northbridge/amd/amdfam10/misc_control.c
+@@ -4,6 +4,7 @@
+ * Copyright (C) 2003 by Eric Biederman
+ * Copyright (C) Stefan Reinauer
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -152,3 +153,9 @@ static const struct pci_driver mcf3_driver __pci_driver = {
+ .vendor = PCI_VENDOR_ID_AMD,
+ .device = 0x1203,
+ };
++
++static const struct pci_driver mcf3_driver_fam15 __pci_driver = {
++ .ops = &mcf3_ops,
++ .vendor = PCI_VENDOR_ID_AMD,
++ .device = 0x1603,
++};
+diff --git a/src/northbridge/amd/amdfam10/nb_control.c b/src/northbridge/amd/amdfam10/nb_control.c
+new file mode 100644
+index 0000000..f95b6f8
+--- /dev/null
++++ b/src/northbridge/amd/amdfam10/nb_control.c
+@@ -0,0 +1,85 @@
++/*
++ * This file is part of the coreboot project.
++ *
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/* Configure various power control registers, including processor boost
++ * and TDP monitoring support.
++ */
++
++#include <console/console.h>
++#include <device/device.h>
++#include <device/pci.h>
++#include <device/pci_ids.h>
++#include <device/pci_ops.h>
++#include <pc80/mc146818rtc.h>
++#include <lib.h>
++#include <cpu/amd/model_10xxx_rev.h>
++
++#include "amdfam10.h"
++
++static void nb_control_init(struct device *dev)
++{
++ uint32_t dword;
++ uint32_t f5x80;
++ uint8_t cu_enabled;
++ uint8_t compute_unit_count = 0;
++
++ printk(BIOS_DEBUG, "NB: Function 5 Northbridge Control.. ");
++
++ /* Determine the number of active compute units on this node */
++ f5x80 = pci_read_config32(dev, 0x80);
++ cu_enabled = f5x80 & 0xf;
++ if (cu_enabled == 0x1)
++ compute_unit_count = 1;
++ if (cu_enabled == 0x3)
++ compute_unit_count = 2;
++ if (cu_enabled == 0x7)
++ compute_unit_count = 3;
++ if (cu_enabled == 0xf)
++ compute_unit_count = 4;
++
++ /* Configure Processor TDP Running Average */
++ dword = pci_read_config32(dev, 0xe0);
++ dword &= ~0xf; /* RunAvgRange = 0x9 */
++ dword |= 0x9;
++ pci_write_config32(dev, 0xe0, dword);
++
++ /* Configure northbridge P-states */
++ dword = pci_read_config32(dev, 0xe0);
++ dword &= ~(0x7 << 9); /* NbPstateThreshold = compute_unit_count */
++ dword |= (compute_unit_count & 0x7) << 9;
++ pci_write_config32(dev, 0xe0, dword);
++
++ printk(BIOS_DEBUG, "done.\n");
++}
++
++
++static struct device_operations mcf5_ops = {
++ .read_resources = pci_dev_read_resources,
++ .set_resources = pci_dev_set_resources,
++ .enable_resources = pci_dev_enable_resources,
++ .init = nb_control_init,
++ .scan_bus = 0,
++ .ops_pci = 0,
++};
++
++static const struct pci_driver mcf5_driver_fam15 __pci_driver = {
++ .ops = &mcf5_ops,
++ .vendor = PCI_VENDOR_ID_AMD,
++ .device = 0x1605,
++};
+\ No newline at end of file
+diff --git a/src/northbridge/amd/amdfam10/northbridge.c b/src/northbridge/amd/amdfam10/northbridge.c
+index fb3b2f7..fcf85a7 100644
+--- a/src/northbridge/amd/amdfam10/northbridge.c
++++ b/src/northbridge/amd/amdfam10/northbridge.c
+@@ -81,6 +81,21 @@ device_t get_node_pci(u32 nodeid, u32 fn)
+ #endif
+ }
+
++static inline uint8_t is_fam15h(void)
++{
++ uint8_t fam15h = 0;
++ uint32_t family;
++
++ family = cpuid_eax(0x80000001);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
++
++ return fam15h;
++}
++
+ static void get_fx_devs(void)
+ {
+ int i;
+@@ -202,7 +217,7 @@ static void amd_g34_fixup(struct bus *link, device_t dev)
+ /* Revision D or later */
+ rev_gte_d = 1;
+
+- if (rev_gte_d) {
++ if (rev_gte_d || is_fam15h()) {
+ f3xe8 = pci_read_config32(get_node_pci(0, 3), 0xe8);
+
+ /* Check for dual node capability */
+@@ -215,6 +230,15 @@ static void amd_g34_fixup(struct bus *link, device_t dev)
+ */
+ f3xe8 = pci_read_config32(get_node_pci(nodeid, 3), 0xe8);
+ uint8_t internal_node_number = ((f3xe8 & 0xc0000000) >> 30);
++ uint8_t defective_link_number_1;
++ uint8_t defective_link_number_2;
++ if (is_fam15h()) {
++ defective_link_number_1 = 4; /* Link 0 Sublink 1 */
++ defective_link_number_2 = 7; /* Link 3 Sublink 1 */
++ } else {
++ defective_link_number_1 = 6; /* Link 2 Sublink 1 */
++ defective_link_number_2 = 5; /* Link 1 Sublink 1 */
++ }
+ if (internal_node_number == 0) {
+ /* Node 0 */
+ if (link->link_num == 6) /* Link 2 Sublink 1 */
+@@ -314,6 +338,46 @@ static void amdfam10_scan_chains(device_t dev)
+ {
+ struct bus *link;
+
++#if CONFIG_CPU_AMD_SOCKET_G34_NON_AGESA
++ if (is_fam15h()) {
++ uint8_t current_link_number = 0;
++
++ for (link = dev->link_list; link; link = link->next) {
++ /* The following links have changed position in Fam15h G34 processors:
++ * Fam10 Fam15
++ * Node 0
++ * L3 --> L1
++ * L0 --> L3
++ * L1 --> L2
++ * L2 --> L0
++ * Node 1
++ * L0 --> L0
++ * L1 --> L3
++ * L2 --> L1
++ * L3 --> L2
++ */
++ if (link->link_num == 0)
++ link->link_num = 3;
++ else if (link->link_num == 1)
++ link->link_num = 2;
++ else if (link->link_num == 2)
++ link->link_num = 0;
++ else if (link->link_num == 3)
++ link->link_num = 1;
++ else if (link->link_num == 5)
++ link->link_num = 7;
++ else if (link->link_num == 6)
++ link->link_num = 5;
++ else if (link->link_num == 7)
++ link->link_num = 6;
++
++ current_link_number++;
++ if (current_link_number > 3)
++ current_link_number = 0;
++ }
++ }
++#endif
++
+ /* Do sb ht chain at first, in case s2885 put sb chain (8131/8111) on link2, but put 8151 on link0 */
+ trim_ht_chain(dev);
+
+@@ -620,13 +684,21 @@ static const struct pci_driver mcf0_driver __pci_driver = {
+ .device = 0x1200,
+ };
+
++
+ static void amdfam10_nb_init(void *chip_info)
+ {
+ relocate_sb_ht_chain();
+ }
+
++static const struct pci_driver mcf0_driver_fam15 __pci_driver = {
++ .ops = &northbridge_operations,
++ .vendor = PCI_VENDOR_ID_AMD,
++ .device = 0x1600,
++};
++
++
+ struct chip_operations northbridge_amd_amdfam10_ops = {
+- CHIP_NAME("AMD FAM10 Northbridge")
++ CHIP_NAME("AMD Family 10h/15h Northbridge")
+ .enable_dev = 0,
+ .init = amdfam10_nb_init,
+ };
+@@ -950,38 +1022,61 @@ static int amdfam10_get_smbios_data16(int* count, int handle, unsigned long *cur
+
+ static uint16_t amdmct_mct_speed_enum_to_mhz(uint8_t speed)
+ {
+- if (IS_ENABLED(CONFIG_DIMM_DDR2)) {
+- switch (speed) {
+- case 1:
+- return 200;
+- case 2:
+- return 266;
+- case 3:
+- return 333;
+- case 4:
+- return 400;
+- case 5:
+- return 533;
+- default:
+- return 0;
+- }
+- } else if (IS_ENABLED(CONFIG_DIMM_DDR3)) {
+- switch (speed) {
+- case 3:
+- return 333;
+- case 4:
+- return 400;
+- case 5:
+- return 533;
+- case 6:
+- return 667;
+- case 7:
+- return 800;
+- default:
+- return 0;
++ if (is_fam15h()) {
++ if (IS_ENABLED(CONFIG_DIMM_DDR3)) {
++ switch (speed) {
++ case 0x4:
++ return 333;
++ case 0x6:
++ return 400;
++ case 0xa:
++ return 533;
++ case 0xe:
++ return 667;
++ case 0x12:
++ return 800;
++ case 0x16:
++ return 933;
++ default:
++ return 0;
++ }
++ } else {
++ return 0;
+ }
+ } else {
+- return 0;
++ if (IS_ENABLED(CONFIG_DIMM_DDR2)) {
++ switch (speed) {
++ case 1:
++ return 200;
++ case 2:
++ return 266;
++ case 3:
++ return 333;
++ case 4:
++ return 400;
++ case 5:
++ return 533;
++ default:
++ return 0;
++ }
++ } else if (IS_ENABLED(CONFIG_DIMM_DDR3)) {
++ switch (speed) {
++ case 3:
++ return 333;
++ case 4:
++ return 400;
++ case 5:
++ return 533;
++ case 6:
++ return 667;
++ case 7:
++ return 800;
++ default:
++ return 0;
++ }
++ } else {
++ return 0;
++ }
+ }
+ }
+
+@@ -1076,6 +1171,8 @@ static int amdfam10_get_smbios_data17(int* count, int handle, int parent_handle,
+ #if IS_ENABLED(CONFIG_DIMM_DDR3)
+ /* Find the maximum and minimum supported voltages */
+ uint8_t supported_voltages = mem_info->dct_stat[node].DimmSupportedVoltages[slot];
++ uint8_t configured_voltage = mem_info->dct_stat[node].DimmConfiguredVoltage[slot];
++
+ if (supported_voltages & 0x8)
+ t->minimum_voltage = 1150;
+ else if (supported_voltages & 0x4)
+@@ -1094,7 +1191,14 @@ static int amdfam10_get_smbios_data17(int* count, int handle, int parent_handle,
+ else if (supported_voltages & 0x8)
+ t->maximum_voltage = 1150;
+
+- t->configured_voltage = mem_info->dct_stat[node].DimmConfiguredVoltage[slot];
++ if (configured_voltage & 0x8)
++ t->configured_voltage = 1150;
++ else if (configured_voltage & 0x4)
++ t->configured_voltage = 1250;
++ else if (configured_voltage & 0x2)
++ t->configured_voltage = 1350;
++ else if (configured_voltage & 0x1)
++ t->configured_voltage = 1500;
+ #endif
+ }
+ t->memory_error_information_handle = 0xFFFE; /* no error information handle available */
+@@ -1233,12 +1337,14 @@ static void cpu_bus_scan(device_t dev)
+ #if CONFIG_CBB
+ device_t pci_domain;
+ #endif
++ int nvram = 0;
+ int i,j;
+ int nodes;
+ unsigned nb_cfg_54;
+ unsigned siblings;
+ int cores_found;
+ int disable_siblings;
++ uint8_t disable_cu_siblings = 0;
+ unsigned ApicIdCoreIdSize;
+
+ nb_cfg_54 = 0;
+@@ -1325,14 +1431,23 @@ static void cpu_bus_scan(device_t dev)
+ /* Always use the devicetree node with lapic_id 0 for BSP. */
+ remap_bsp_lapic(cpu_bus);
+
++ if (get_option(&nvram, "compute_unit_siblings") == CB_SUCCESS)
++ disable_cu_siblings = !!nvram;
++
++ if (disable_cu_siblings)
++ printk(BIOS_DEBUG, "Disabling siblings on each compute unit as requested\n");
++
+ for(i = 0; i < nodes; i++) {
+ device_t cdb_dev;
+ unsigned busn, devn;
+ struct bus *pbus;
+
++ uint8_t fam15h = 0;
+ uint8_t rev_gte_d = 0;
+ uint8_t dual_node = 0;
+ uint32_t f3xe8;
++ uint32_t family;
++ uint32_t model;
+
+ busn = CONFIG_CBB;
+ devn = CONFIG_CDB+i;
+@@ -1372,7 +1487,16 @@ static void cpu_bus_scan(device_t dev)
+
+ f3xe8 = pci_read_config32(get_node_pci(0, 3), 0xe8);
+
+- if (cpuid_eax(0x80000001) >= 0x8)
++ family = model = cpuid_eax(0x80000001);
++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4);
++
++ if (is_fam15h()) {
++ /* Family 15h or later */
++ fam15h = 1;
++ nb_cfg_54 = 1;
++ }
++
++ if ((model >= 0x8) || fam15h)
+ /* Revision D or later */
+ rev_gte_d = 1;
+
+@@ -1382,13 +1506,20 @@ static void cpu_bus_scan(device_t dev)
+ dual_node = 1;
+
+ cores_found = 0; // one core
+- cdb_dev = dev_find_slot(busn, PCI_DEVFN(devn, 3));
++ if (fam15h)
++ cdb_dev = dev_find_slot(busn, PCI_DEVFN(devn, 5));
++ else
++ cdb_dev = dev_find_slot(busn, PCI_DEVFN(devn, 3));
+ int enable_node = cdb_dev && cdb_dev->enabled;
+ if (enable_node) {
+- j = pci_read_config32(cdb_dev, 0xe8);
+- cores_found = (j >> 12) & 3; // dev is func 3
+- if (siblings > 3)
+- cores_found |= (j >> 13) & 4;
++ if (fam15h) {
++ cores_found = pci_read_config32(cdb_dev, 0x84) & 0xff;
++ } else {
++ j = pci_read_config32(cdb_dev, 0xe8);
++ cores_found = (j >> 12) & 3; // dev is func 3
++ if (siblings > 3)
++ cores_found |= (j >> 13) & 4;
++ }
+ printk(BIOS_DEBUG, " %s siblings=%d\n", dev_path(cdb_dev), cores_found);
+ }
+
+@@ -1408,15 +1539,24 @@ static void cpu_bus_scan(device_t dev)
+
+ if (dual_node) {
+ apic_id = 0;
+- if (nb_cfg_54) {
+- apic_id |= ((i >> 1) & 0x3) << 4; /* Node ID */
++ if (fam15h) {
++ apic_id |= ((i >> 1) & 0x3) << 5; /* Node ID */
+ apic_id |= ((i & 0x1) * (siblings + 1)) + j; /* Core ID */
+ } else {
+- apic_id |= i & 0x3; /* Node ID */
+- apic_id |= (((i & 0x1) * (siblings + 1)) + j) << 4; /* Core ID */
++ if (nb_cfg_54) {
++ apic_id |= ((i >> 1) & 0x3) << 4; /* Node ID */
++ apic_id |= ((i & 0x1) * (siblings + 1)) + j; /* Core ID */
++ } else {
++ apic_id |= i & 0x3; /* Node ID */
++ apic_id |= (((i & 0x1) * (siblings + 1)) + j) << 4; /* Core ID */
++ }
+ }
+ } else {
+- apic_id = i * (nb_cfg_54?(siblings+1):1) + j * (nb_cfg_54?1:64); // ?
++ if (fam15h) {
++ apic_id = (i * (siblings + 1)) + j;
++ } else {
++ apic_id = i * (nb_cfg_54?(siblings+1):1) + j * (nb_cfg_54?1:64); // ?
++ }
+ }
+
+ #if CONFIG_ENABLE_APIC_EXT_ID && (CONFIG_APIC_ID_OFFSET>0)
+@@ -1426,6 +1566,9 @@ static void cpu_bus_scan(device_t dev)
+ }
+ }
+ #endif
++ if (disable_cu_siblings && (j & 0x1))
++ continue;
++
+ device_t cpu = add_cpu_device(cpu_bus, apic_id, enable_node);
+ if (cpu)
+ amd_cpu_topology(cpu, i, j);
+@@ -1484,6 +1627,6 @@ static void root_complex_enable_dev(struct device *dev)
+ }
+
+ struct chip_operations northbridge_amd_amdfam10_root_complex_ops = {
+- CHIP_NAME("AMD FAM10 Root Complex")
++ CHIP_NAME("AMD Family 10h/15h Root Complex")
+ .enable_dev = root_complex_enable_dev,
+ };
+diff --git a/src/northbridge/amd/amdfam10/raminit_amdmct.c b/src/northbridge/amd/amdfam10/raminit_amdmct.c
+index 5068e7a..cae228f 100644
+--- a/src/northbridge/amd/amdfam10/raminit_amdmct.c
++++ b/src/northbridge/amd/amdfam10/raminit_amdmct.c
+@@ -44,8 +44,120 @@ static void print_tf(const char *func, const char *strval)
+ #endif
+ }
+
+-static uint16_t mct_MaxLoadFreq(uint8_t count, uint8_t registered, uint16_t freq)
++static inline void fam15h_switch_dct(uint32_t dev, uint8_t dct)
+ {
++ uint32_t dword;
++
++ dword = Get_NB32(dev, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ Set_NB32(dev, 0x10c, dword);
++}
++
++static inline void fam15h_switch_nb_pstate_config_reg(uint32_t dev, uint8_t nb_pstate)
++{
++ uint32_t dword;
++
++ dword = Get_NB32(dev, 0x10c);
++ dword &= ~(0x3 << 4);
++ dword |= (nb_pstate & 0x3) << 4;
++ Set_NB32(dev, 0x10c, dword);
++}
++
++static inline uint32_t Get_NB32_DCT(uint32_t dev, uint8_t dct, uint32_t reg)
++{
++ if (is_fam15h()) {
++ /* Obtain address of function 0x1 */
++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12);
++ fam15h_switch_dct(dev_map, dct);
++ return Get_NB32(dev, reg);
++ } else {
++ return Get_NB32(dev, (0x100 * dct) + reg);
++ }
++}
++
++static inline void Set_NB32_DCT(uint32_t dev, uint8_t dct, uint32_t reg, uint32_t val)
++{
++ if (is_fam15h()) {
++ /* Obtain address of function 0x1 */
++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12);
++ fam15h_switch_dct(dev_map, dct);
++ Set_NB32(dev, reg, val);
++ } else {
++ Set_NB32(dev, (0x100 * dct) + reg, val);
++ }
++}
++
++static inline uint32_t Get_NB32_DCT_NBPstate(uint32_t dev, uint8_t dct, uint8_t nb_pstate, uint32_t reg)
++{
++ if (is_fam15h()) {
++ /* Obtain address of function 0x1 */
++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12);
++ fam15h_switch_dct(dev_map, dct);
++ fam15h_switch_nb_pstate_config_reg(dev_map, nb_pstate);
++ return Get_NB32(dev, reg);
++ } else {
++ return Get_NB32(dev, (0x100 * dct) + reg);
++ }
++}
++
++static inline void Set_NB32_DCT_NBPstate(uint32_t dev, uint8_t dct, uint8_t nb_pstate, uint32_t reg, uint32_t val)
++{
++ if (is_fam15h()) {
++ /* Obtain address of function 0x1 */
++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12);
++ fam15h_switch_dct(dev_map, dct);
++ fam15h_switch_nb_pstate_config_reg(dev_map, nb_pstate);
++ Set_NB32(dev, reg, val);
++ } else {
++ Set_NB32(dev, (0x100 * dct) + reg, val);
++ }
++}
++
++static inline uint32_t Get_NB32_index_wait_DCT(uint32_t dev, uint8_t dct, uint32_t index_reg, uint32_t index)
++{
++ if (is_fam15h()) {
++ /* Obtain address of function 0x1 */
++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12);
++ fam15h_switch_dct(dev_map, dct);
++ return Get_NB32_index_wait(dev, index_reg, index);
++ } else {
++ return Get_NB32_index_wait(dev, (0x100 * dct) + index_reg, index);
++ }
++}
++
++static inline void Set_NB32_index_wait_DCT(uint32_t dev, uint8_t dct, uint32_t index_reg, uint32_t index, uint32_t data)
++{
++ if (is_fam15h()) {
++ /* Obtain address of function 0x1 */
++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12);
++ fam15h_switch_dct(dev_map, dct);
++ Set_NB32_index_wait(dev, index_reg, index, data);
++ } else {
++ Set_NB32_index_wait(dev, (0x100 * dct) + index_reg, index, data);
++ }
++}
++
++static uint16_t voltage_index_to_mv(uint8_t index)
++{
++ if (index & 0x8)
++ return 1150;
++ if (index & 0x4)
++ return 1250;
++ else if (index & 0x2)
++ return 1350;
++ else
++ return 1500;
++}
++
++static uint16_t mct_MaxLoadFreq(uint8_t count, uint8_t highest_rank_count, uint8_t registered, uint8_t voltage, uint16_t freq)
++{
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
+ /* Return limited maximum RAM frequency */
+ if (IS_ENABLED(CONFIG_DIMM_DDR2)) {
+ if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) {
+@@ -68,34 +180,178 @@ static uint16_t mct_MaxLoadFreq(uint8_t count, uint8_t registered, uint16_t freq
+ }
+ }
+ } else if (IS_ENABLED(CONFIG_DIMM_DDR3)) {
+- if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) {
+- /* K10 BKDG Rev. 3.62 Table 34 */
+- if (count > 2) {
+- /* Limit to DDR3-800 */
+- if (freq > 400) {
+- freq = 400;
+- print_tf(__func__, ": More than 2 registered DIMMs on channel; limiting to DDR3-800\n");
++ if (voltage == 0) {
++ printk(BIOS_DEBUG, "%s: WARNING: Mainboard DDR3 voltage unknown, assuming 1.5V!\n", __func__);
++ voltage = 0x1;
++ }
++
++ if (is_fam15h()) {
++ if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) {
++ /* Fam15h BKDG Rev. 3.14 Table 27 */
++ if (voltage & 0x4) {
++ /* 1.25V */
++ if (count > 1) {
++ if (highest_rank_count > 1) {
++ /* Limit to DDR3-1066 */
++ if (freq > 533) {
++ freq = 533;
++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1066\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else if (voltage & 0x2) {
++ /* 1.35V */
++ if (count > 1) {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1600 */
++ if (freq > 800) {
++ freq = 800;
++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else if (voltage & 0x1) {
++ /* 1.50V */
++ if (count > 1) {
++ /* Limit to DDR3-1600 */
++ if (freq > 800) {
++ freq = 800;
++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1866 */
++ if (freq > 933) {
++ freq = 933;
++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1866\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ }
++ } else {
++ /* Fam15h BKDG Rev. 3.14 Table 26 */
++ if (voltage & 0x4) {
++ /* 1.25V */
++ if (count > 1) {
++ if (highest_rank_count > 1) {
++ /* Limit to DDR3-1066 */
++ if (freq > 533) {
++ freq = 533;
++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1066\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else if (voltage & 0x2) {
++ /* 1.35V */
++ if (MaxDimmsInstallable > 1) {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1600 */
++ if (freq > 800) {
++ freq = 800;
++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else if (voltage & 0x1) {
++ if (MaxDimmsInstallable == 1) {
++ if (count > 1) {
++ /* Limit to DDR3-1600 */
++ if (freq > 800) {
++ freq = 800;
++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1866 */
++ if (freq > 933) {
++ freq = 933;
++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1866\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else {
++ if (count > 1) {
++ if (highest_rank_count > 1) {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1600 */
++ if (freq > 800) {
++ freq = 800;
++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ } else {
++ /* Limit to DDR3-1600 */
++ if (freq > 800) {
++ freq = 800;
++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage));
++ }
++ }
++ }
+ }
+- } else if (count == 2) {
+- /* Limit to DDR3-1066 */
+- if (freq > 533) {
+- freq = 533;
+- print_tf(__func__, ": 2 registered DIMMs on channel; limiting to DDR3-1066\n");
++ }
++ } else {
++ if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) {
++ /* K10 BKDG Rev. 3.62 Table 34 */
++ if (count > 2) {
++ /* Limit to DDR3-800 */
++ if (freq > 400) {
++ freq = 400;
++ printk(BIOS_DEBUG, "%s: More than 2 registered DIMMs on %dmV channel; limiting to DDR3-800\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else if (count == 2) {
++ /* Limit to DDR3-1066 */
++ if (freq > 533) {
++ freq = 533;
++ printk(BIOS_DEBUG, "%s: 2 registered DIMMs on %dmV channel; limiting to DDR3-1066\n", __func__, voltage_index_to_mv(voltage));
++ }
++ } else {
++ /* Limit to DDR3-1333 */
++ if (freq > 666) {
++ freq = 666;
++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
++ }
+ }
+ } else {
++ /* K10 BKDG Rev. 3.62 Table 33 */
+ /* Limit to DDR3-1333 */
+ if (freq > 666) {
+ freq = 666;
+- print_tf(__func__, ": 1 registered DIMM on channel; limiting to DDR3-1333\n");
++ printk(BIOS_DEBUG, "%s: unbuffered DIMMs on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage));
+ }
+ }
+- } else {
+- /* K10 BKDG Rev. 3.62 Table 33 */
+- /* Limit to DDR3-1333 */
+- if (freq > 666) {
+- freq = 666;
+- print_tf(__func__, ": unbuffered DIMMs on channel; limiting to DDR3-1333\n");
+- }
+ }
+ }
+
+@@ -225,11 +481,13 @@ void mctGet_DIMMAddr(struct DCTStatStruc *pDCTstat, u32 node)
+
+ }
+
++#if IS_ENABLED(CONFIG_SET_FIDVID)
+ static u8 mctGetProcessorPackageType(void) {
+ /* FIXME: I guess this belongs wherever mctGetLogicalCPUID ends up ? */
+- u32 BrandId = cpuid_ebx(0x80000001);
+- return (u8)((BrandId >> 28) & 0x0F);
++ u32 BrandId = cpuid_ebx(0x80000001);
++ return (u8)((BrandId >> 28) & 0x0F);
+ }
++#endif
+
+ static void raminit_amdmct(struct sys_info *sysinfo)
+ {
+diff --git a/src/northbridge/amd/amdht/h3ncmn.c b/src/northbridge/amd/amdht/h3ncmn.c
+index 97f9db8..8f9177f 100644
+--- a/src/northbridge/amd/amdht/h3ncmn.c
++++ b/src/northbridge/amd/amdht/h3ncmn.c
+@@ -43,6 +43,7 @@
+ #define CPU_HTNB_FUNC_04 4
+ #define CPU_ADDR_FUNC_01 1
+ #define CPU_NB_FUNC_03 3
++#define CPU_NB_FUNC_05 5
+
+ /* Function 0 registers */
+ #define REG_ROUTE0_0X40 0x40
+@@ -70,6 +71,7 @@
+ #define REG_NB_CPUID_3XFC 0xFC
+ #define REG_NB_LINK_XCS_TOKEN0_3X148 0x148
+ #define REG_NB_DOWNCORE_3X190 0x190
++#define REG_NB_CAPABILITY_5X84 0x84
+
+ /* Function 4 registers */
+
+@@ -555,9 +557,10 @@ static u8 fam10GetNumCoresOnNode(u8 node, cNorthBridge *nb)
+ 15, 12, &temp);
+
+ /* bits[15,13,12] specify the cores */
+- /* Support Downcoring */
+ temp = ((temp & 8) >> 1) + (temp & 3);
+ cores = temp + 1;
++
++ /* Support Downcoring */
+ AmdPCIReadBits (MAKE_SBDFO(makePCISegmentFromNode(node),
+ makePCIBusFromNode(node),
+ makePCIDeviceFromNode(node),
+@@ -576,6 +579,56 @@ static u8 fam10GetNumCoresOnNode(u8 node, cNorthBridge *nb)
+
+ /***************************************************************************//**
+ *
++ * static u8
++ * fam15GetNumCoresOnNode(u8 node, cNorthBridge *nb)
++ *
++ * Description:
++ * Return the number of cores (1 based count) on node.
++ *
++ * Parameters:
++ * @param[in] node = the node that will be examined
++ * @param[in] *nb = this northbridge
++ * @return = the number of cores
++ *
++ *
++ */
++static u8 fam15GetNumCoresOnNode(u8 node, cNorthBridge *nb)
++{
++ u32 temp, leveling, cores;
++ u8 i;
++
++ ASSERT((node < nb->maxNodes));
++ /* Read CmpCap [7:0] */
++ AmdPCIReadBits(MAKE_SBDFO(makePCISegmentFromNode(node),
++ makePCIBusFromNode(node),
++ makePCIDeviceFromNode(node),
++ CPU_NB_FUNC_05,
++ REG_NB_CAPABILITY_5X84),
++ 7, 0, &temp);
++
++ /* bits[7:0] specify the cores */
++ temp = temp & 0xff;
++ cores = temp + 1;
++
++ /* Support Downcoring */
++ AmdPCIReadBits (MAKE_SBDFO(makePCISegmentFromNode(node),
++ makePCIBusFromNode(node),
++ makePCIDeviceFromNode(node),
++ CPU_NB_FUNC_03,
++ REG_NB_DOWNCORE_3X190),
++ 31, 0, &leveling);
++ for (i=0; i<cores; i++)
++ {
++ if (leveling & ((u32) 1 << i))
++ {
++ temp--;
++ }
++ }
++ return (u8)(temp+1);
++}
++
++/***************************************************************************//**
++ *
+ * static void
+ * setTotalNodesAndCores(u8 node, u8 totalNodes, u8 totalCores, cNorthBridge *nb)
+ *
+@@ -854,6 +907,69 @@ static BOOL fam10IsCapable(u8 node, sMainData *pDat, cNorthBridge *nb)
+
+ /***************************************************************************//**
+ *
++ * static BOOL
++ * fam15IsCapable(u8 node, sMainData *pDat, cNorthBridge *nb)
++ *
++ * Description:
++ * Get node capability and update the minimum supported system capability.
++ * Return whether the current configuration exceeds the capability.
++ *
++ * Parameters:
++ * @param[in] node = the node
++ * @param[in,out] *pDat = sysMpCap (updated) and NodesDiscovered
++ * @param[in] *nb = this northbridge
++ * @return true: system is capable of current config.
++ * false: system is not capable of current config.
++ *
++ * ---------------------------------------------------------------------------------------
++ */
++static BOOL fam15IsCapable(u8 node, sMainData *pDat, cNorthBridge *nb)
++{
++#ifndef HT_BUILD_NC_ONLY
++ u32 temp;
++ u8 maxNodes;
++
++ ASSERT(node < nb->maxNodes);
++
++ AmdPCIReadBits(MAKE_SBDFO(makePCISegmentFromNode(node),
++ makePCIBusFromNode(node),
++ makePCIDeviceFromNode(node),
++ CPU_NB_FUNC_03,
++ REG_NB_CAPABILITY_3XE8),
++ 18, 16, &temp);
++
++ if (temp != 0)
++ {
++ maxNodes = (1 << (~temp & 0x3)); /* That is, 1, 2, 4, or 8 */
++ }
++ else
++ {
++ /* Check if CPU package is dual node */
++ AmdPCIReadBits(MAKE_SBDFO(makePCISegmentFromNode(node),
++ makePCIBusFromNode(node),
++ makePCIDeviceFromNode(node),
++ CPU_NB_FUNC_03,
++ REG_NB_CAPABILITY_3XE8),
++ 29, 29, &temp);
++ if (temp)
++ maxNodes = 4;
++ else
++ maxNodes = 8;
++ }
++
++ if (pDat->sysMpCap > maxNodes)
++ {
++ pDat->sysMpCap = maxNodes;
++ }
++ /* Note since sysMpCap is one based and NodesDiscovered is zero based, equal is false */
++ return (pDat->sysMpCap > pDat->NodesDiscovered);
++#else
++ return 1;
++#endif
++}
++
++/***************************************************************************//**
++ *
+ * static void
+ * fam0fStopLink(u8 currentNode, u8 currentLink, cNorthBridge *nb)
+ *
+@@ -2068,6 +2184,49 @@ void newNorthBridge(u8 node, cNorthBridge *nb)
+ u32 match;
+ u32 extFam, baseFam, model;
+
++ cNorthBridge fam15 =
++ {
++#ifdef HT_BUILD_NC_ONLY
++ 8,
++ 1,
++ 12,
++#else
++ 8,
++ 8,
++ 64,
++#endif /* HT_BUILD_NC_ONLY*/
++ writeRoutingTable,
++ writeNodeID,
++ readDefLnk,
++ enableRoutingTables,
++ verifyLinkIsCoherent,
++ readTrueLinkFailStatus,
++ readToken,
++ writeToken,
++ fam15GetNumCoresOnNode,
++ setTotalNodesAndCores,
++ limitNodes,
++ writeFullRoutingTable,
++ isCompatible,
++ fam15IsCapable,
++ (void (*)(u8, u8, cNorthBridge*))commonVoid,
++ (BOOL (*)(u8, u8, sMainData*, cNorthBridge*))commonReturnFalse,
++ readSbLink,
++ verifyLinkIsNonCoherent,
++ ht3SetCFGAddrMap,
++ convertBitsToWidth,
++ convertWidthToBits,
++ fam10NorthBridgeFreqMask,
++ gatherLinkData,
++ setLinkData,
++ ht3WriteTrafficDistribution,
++ fam10BufferOptimizations,
++ 0x00000001,
++ 0x00000200,
++ 18,
++ 0x00000f06
++ };
++
+ cNorthBridge fam10 =
+ {
+ #ifdef HT_BUILD_NC_ONLY
+@@ -2175,8 +2334,14 @@ void newNorthBridge(u8 node, cNorthBridge *nb)
+ 7, 4, &model);
+ match = (u32)((baseFam << 8) | extFam);
+
+- /* Test each in turn looking for a match. Init the struct if found */
+- if (match == fam10.compatibleKey)
++ /* Test each in turn looking for a match.
++ * Initialize the struct if found.
++ */
++ if (match == fam15.compatibleKey)
++ {
++ Amdmemcpy((void *)nb, (const void *)&fam15, (u32) sizeof(cNorthBridge));
++ }
++ else if (match == fam10.compatibleKey)
+ {
+ Amdmemcpy((void *)nb, (const void *)&fam10, (u32) sizeof(cNorthBridge));
+ }
+diff --git a/src/northbridge/amd/amdht/ht_wrapper.c b/src/northbridge/amd/amdht/ht_wrapper.c
+index 389b1b1..c0ccc69 100644
+--- a/src/northbridge/amd/amdht/ht_wrapper.c
++++ b/src/northbridge/amd/amdht/ht_wrapper.c
+@@ -174,16 +174,22 @@ void amd_ht_fixup(struct sys_info *sysinfo) {
+ printk(BIOS_DEBUG, "amd_ht_fixup()\n");
+ if (IS_ENABLED(CONFIG_CPU_AMD_MODEL_10XXX)) {
+ uint8_t rev_gte_d = 0;
++ uint8_t fam15h = 0;
+ uint8_t dual_node = 0;
+ uint32_t f3xe8;
+ uint32_t family;
+ uint32_t model;
+
+ family = model = cpuid_eax(0x80000001);
+- model = ((model & 0xf0000) >> 16) | ((model & 0xf0) >> 4);
++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
+
+- if (model >= 0x8)
+- /* Revision D or later */
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
++
++ if ((model >= 0x8) || fam15h)
++ /* Family 10h Revision D or later */
+ rev_gte_d = 1;
+
+ if (rev_gte_d) {
+@@ -195,7 +201,8 @@ void amd_ht_fixup(struct sys_info *sysinfo) {
+
+ if (dual_node) {
+ /* Each G34 processor contains a defective HT link.
+- * See the BKDG Rev 3.62 section 2.7.1.5 for details.
++ * See the Family 10h BKDG Rev 3.62 section 2.7.1.5 for details
++ * For Family 15h see the BKDG Rev. 3.14 section 2.12.1.5 for details.
+ */
+ uint8_t node;
+ uint8_t node_count = get_nodes();
+@@ -205,46 +212,46 @@ void amd_ht_fixup(struct sys_info *sysinfo) {
+ uint8_t internal_node_number = ((f3xe8 & 0xc0000000) >> 30);
+ printk(BIOS_DEBUG, "amd_ht_fixup(): node %d (internal node ID %d): disabling defective HT link\n", node, internal_node_number);
+ if (internal_node_number == 0) {
+- uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), 0xd8) & 0x1;
++ uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x98:0xd8) & 0x1;
+ if (package_link_3_connected) {
+ /* Set WidthIn and WidthOut to 0 */
+- dword = pci_read_config32(NODE_PCI(node, 0), 0xc4);
++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x84:0xc4);
+ dword &= ~0x77000000;
+- pci_write_config32(NODE_PCI(node, 0), 0xc4, dword);
++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0x84:0xc4, dword);
+ /* Set Ganged to 1 */
+- dword = pci_read_config32(NODE_PCI(node, 0), 0x178);
++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x170:0x178);
+ dword |= 0x00000001;
+- pci_write_config32(NODE_PCI(node, 0), 0x178, dword);
++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0x170:0x178, dword);
+ } else {
+ /* Set ConnDly to 1 */
+ dword = pci_read_config32(NODE_PCI(node, 0), 0x16c);
+ dword |= 0x00000100;
+ pci_write_config32(NODE_PCI(node, 0), 0x16c, dword);
+ /* Set TransOff and EndOfChain to 1 */
+- dword = pci_read_config32(NODE_PCI(node, 4), 0xc4);
++ dword = pci_read_config32(NODE_PCI(node, 4), (fam15h)?0x84:0xc4);
+ dword |= 0x000000c0;
+- pci_write_config32(NODE_PCI(node, 4), 0xc4, dword);
++ pci_write_config32(NODE_PCI(node, 4), (fam15h)?0x84:0xc4, dword);
+ }
+ } else if (internal_node_number == 1) {
+- uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), 0xb8) & 0x1;
++ uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0xf8:0xb8) & 0x1;
+ if (package_link_3_connected) {
+ /* Set WidthIn and WidthOut to 0 */
+- dword = pci_read_config32(NODE_PCI(node, 0), 0xa4);
++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0xe4:0xa4);
+ dword &= ~0x77000000;
+- pci_write_config32(NODE_PCI(node, 0), 0xa4, dword);
++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0xe4:0xa4, dword);
+ /* Set Ganged to 1 */
+- dword = pci_read_config32(NODE_PCI(node, 0), 0x174);
++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x18c:0x174);
+ dword |= 0x00000001;
+- pci_write_config32(NODE_PCI(node, 0), 0x174, dword);
++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0x18c:0x174, dword);
+ } else {
+ /* Set ConnDly to 1 */
+ dword = pci_read_config32(NODE_PCI(node, 0), 0x16c);
+ dword |= 0x00000100;
+ pci_write_config32(NODE_PCI(node, 0), 0x16c, dword);
+ /* Set TransOff and EndOfChain to 1 */
+- dword = pci_read_config32(NODE_PCI(node, 4), 0xa4);
++ dword = pci_read_config32(NODE_PCI(node, 4), (fam15h)?0xe4:0xa4);
+ dword |= 0x000000c0;
+- pci_write_config32(NODE_PCI(node, 4), 0xa4, dword);
++ pci_write_config32(NODE_PCI(node, 4), (fam15h)?0xe4:0xa4, dword);
+ }
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/amddefs.h b/src/northbridge/amd/amdmct/amddefs.h
+index 117fea5..20a77d3 100644
+--- a/src/northbridge/amd/amdmct/amddefs.h
++++ b/src/northbridge/amd/amdmct/amddefs.h
+@@ -20,33 +20,35 @@
+ /* FIXME: this file should be moved to include/cpu/amd/amddefs.h */
+
+ /* Public Revisions - USE THESE VERSIONS TO MAKE COMPARE WITH CPULOGICALID RETURN VALUE*/
+-#define AMD_SAFEMODE 0x80000000 /* Unknown future revision - SAFE MODE */
+-#define AMD_NPT_F0 0x00000001 /* F0 stepping */
+-#define AMD_NPT_F1 0x00000002 /* F1 stepping */
+-#define AMD_NPT_F2C 0x00000004
+-#define AMD_NPT_F2D 0x00000008
+-#define AMD_NPT_F2E 0x00000010 /* F2 stepping E */
+-#define AMD_NPT_F2G 0x00000020 /* F2 stepping G */
+-#define AMD_NPT_F2J 0x00000040
+-#define AMD_NPT_F2K 0x00000080
+-#define AMD_NPT_F3L 0x00000100 /* F3 Stepping */
+-#define AMD_NPT_G0A 0x00000200 /* G0 stepping */
+-#define AMD_NPT_G1B 0x00000400 /* G1 stepping */
+-#define AMD_DR_A0A 0x00010000 /* Barcelona A0 */
+-#define AMD_DR_A1B 0x00020000 /* Barcelona A1 */
+-#define AMD_DR_A2 0x00040000 /* Barcelona A2 */
+-#define AMD_DR_B0 0x00080000 /* Barcelona B0 */
+-#define AMD_DR_B1 0x00100000 /* Barcelona B1 */
+-#define AMD_DR_B2 0x00200000 /* Barcelona B2 */
+-#define AMD_DR_BA 0x00400000 /* Barcelona BA */
+-#define AMD_DR_B3 0x00800000 /* Barcelona B3 */
+-#define AMD_RB_C2 0x01000000 /* Shanghai C2 */
+-#define AMD_DA_C2 0x02000000 /* XXXX C2 */
+-#define AMD_HY_D0 0x04000000 /* Istanbul D0 */
+-#define AMD_RB_C3 0x08000000 /* ??? C3 */
+-#define AMD_DA_C3 0x10000000 /* XXXX C3 */
+-#define AMD_HY_D1 0x20000000 /* Istanbul D1 */
+-#define AMD_PH_E0 0x40000000 /* Phenom II X4 X6 */
++#define AMD_SAFEMODE 0x8000000000000000 /* Unknown future revision - SAFE MODE */
++#define AMD_NPT_F0 0x0000000000000001 /* F0 stepping */
++#define AMD_NPT_F1 0x0000000000000002 /* F1 stepping */
++#define AMD_NPT_F2C 0x0000000000000004
++#define AMD_NPT_F2D 0x0000000000000008
++#define AMD_NPT_F2E 0x0000000000000010 /* F2 stepping E */
++#define AMD_NPT_F2G 0x0000000000000020 /* F2 stepping G */
++#define AMD_NPT_F2J 0x0000000000000040
++#define AMD_NPT_F2K 0x0000000000000080
++#define AMD_NPT_F3L 0x0000000000000100 /* F3 Stepping */
++#define AMD_NPT_G0A 0x0000000000000200 /* G0 stepping */
++#define AMD_NPT_G1B 0x0000000000000400 /* G1 stepping */
++#define AMD_DR_A0A 0x0000000000010000 /* Barcelona A0 */
++#define AMD_DR_A1B 0x0000000000020000 /* Barcelona A1 */
++#define AMD_DR_A2 0x0000000000040000 /* Barcelona A2 */
++#define AMD_DR_B0 0x0000000000080000 /* Barcelona B0 */
++#define AMD_DR_B1 0x0000000000100000 /* Barcelona B1 */
++#define AMD_DR_B2 0x0000000000200000 /* Barcelona B2 */
++#define AMD_DR_BA 0x0000000000400000 /* Barcelona BA */
++#define AMD_DR_B3 0x0000000000800000 /* Barcelona B3 */
++#define AMD_RB_C2 0x0000000001000000 /* Shanghai C2 */
++#define AMD_DA_C2 0x0000000002000000 /* XXXX C2 */
++#define AMD_HY_D0 0x0000000004000000 /* Istanbul D0 */
++#define AMD_RB_C3 0x0000000008000000 /* ??? C3 */
++#define AMD_DA_C3 0x0000000010000000 /* XXXX C3 */
++#define AMD_HY_D1 0x0000000020000000 /* Istanbul D1 */
++#define AMD_PH_E0 0x0000000040000000 /* Phenom II X4 X6 */
++#define AMD_OR_B2 0x0000000080000000 /* Interlagos */
++#define AMD_OR_C0 0x0000000100000000 /* Abu Dhabi */
+
+ /*
+ * Groups - Create as many as you wish, from the above public values
+@@ -76,6 +78,7 @@
+ #define AMD_DRBH_Cx (AMD_DR_Cx | AMD_HY_D0 )
+ #define AMD_DRBA23_RBC2 (AMD_DR_BA | AMD_DR_B2 | AMD_DR_B3 | AMD_RB_C2 )
+ #define AMD_DR_DAC2_OR_C3 (AMD_DA_C2 | AMD_DA_C3 | AMD_RB_C3)
++#define AMD_FAM15_ALL (AMD_OR_B2 | AMD_OR_C0)
+
+ /*
+ * Public Platforms - USE THESE VERSIONS TO MAKE COMPARE WITH CPUPLATFORMTYPE RETURN VALUE
+@@ -122,23 +125,34 @@
+ */
+ #define CPUID_EXT_PM 0x80000007
+ #define CPUID_MODEL 1
+-#define MCG_CAP 0x00000179
++#define MCG_CAP 0x00000179
+ #define MCG_CTL_P 8
+-#define MC0_CTL 0x00000400
+-#define MC0_STA MC0_CTL + 1
+-#define FS_Base 0xC0000100
++#define MC0_CTL 0x00000400
++#define MC0_STA (MC0_CTL + 1)
++#define MC4_MISC0 0x00000413
++#define MC4_MISC1 0xC0000408
++#define MC4_MISC2 0xC0000409
++#define FS_Base 0xC0000100
+ #define SYSCFG 0xC0010010
+ #define HWCR 0xC0010015
+ #define NB_CFG 0xC001001F
+ #define FidVidStatus 0xC0010042
++#define MC1_CTL_MASK 0xC0010045
+ #define MC4_CTL_MASK 0xC0010048
+ #define OSVW_ID_Length 0xC0010140
+ #define OSVW_Status 0xC0010141
+ #define CPUIDFEATURES 0xC0011004
+ #define LS_CFG 0xC0011020
++#define IC_CFG 0xC0011021
+ #define DC_CFG 0xC0011022
+ #define BU_CFG 0xC0011023
+-#define BU_CFG2 0xC001102A
++#define FP_CFG 0xC0011028
++#define DE_CFG 0xC0011029
++#define BU_CFG2 0xC001102A
++#define BU_CFG3 0xC001102B
++#define EX_CFG 0xC001102C
++#define LS_CFG2 0xC001102D
++#define IBS_OP_DATA3 0xC0011037
+
+ /*
+ * Processor package types
+diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c
+index 88910e2..be0af65 100644
+--- a/src/northbridge/amd/amdmct/mct/mct_d.c
++++ b/src/northbridge/amd/amdmct/mct/mct_d.c
+@@ -2189,6 +2189,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->DimmManufacturerID[i] |= ((uint64_t)mctRead_SPD(smbaddr, SPD_MANID_START + k)) << (k * 8);
+ for (k = 0; k < SPD_PARTN_LENGTH; k++)
+ pDCTstat->DimmPartNumber[i][k] = mctRead_SPD(smbaddr, SPD_PARTN_START + k);
++ pDCTstat->DimmPartNumber[i][SPD_PARTN_LENGTH] = 0;
+ pDCTstat->DimmRevisionNumber[i] = 0;
+ for (k = 0; k < 2; k++)
+ pDCTstat->DimmRevisionNumber[i] |= ((uint16_t)mctRead_SPD(smbaddr, SPD_REVNO_START + k)) << (k * 8);
+@@ -2206,8 +2207,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ if (byte & JED_REGADCMSK) {
+ RegDIMMPresent |= 1 << i;
+ pDCTstat->DimmRegistered[i] = 1;
+- }
+- else {
++ } else {
+ pDCTstat->DimmRegistered[i] = 0;
+ }
+ /* Check ECC capable */
+diff --git a/src/northbridge/amd/amdmct/mct/mct_d.h b/src/northbridge/amd/amdmct/mct/mct_d.h
+index 132bdc9..6b6194d 100644
+--- a/src/northbridge/amd/amdmct/mct/mct_d.h
++++ b/src/northbridge/amd/amdmct/mct/mct_d.h
+@@ -434,7 +434,7 @@ struct DCTStatStruc { /* A per Node structure*/
+ /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/
+ /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/
+ /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/
+- u32 LogicalCPUID; /* The logical CPUID of the node*/
++ uint64_t LogicalCPUID; /* The logical CPUID of the node*/
+ u16 HostBiosSrvc1; /* Word sized general purpose field for use by host BIOS. Scratch space.*/
+ u32 HostBiosSrvc2; /* Dword sized general purpose field for use by host BIOS. Scratch space.*/
+ u16 DimmQRPresent; /* QuadRank DIMM present?*/
+@@ -529,7 +529,7 @@ struct DCTStatStruc { /* A per Node structure*/
+ uint8_t DimmRegistered[MAX_DIMMS_SUPPORTED];
+
+ uint64_t DimmManufacturerID[MAX_DIMMS_SUPPORTED];
+- char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH];
++ char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH+1];
+ uint16_t DimmRevisionNumber[MAX_DIMMS_SUPPORTED];
+ uint32_t DimmSerialNumber[MAX_DIMMS_SUPPORTED];
+ } __attribute__((packed));
+@@ -598,17 +598,18 @@ struct DCTStatStruc { /* A per Node structure*/
+ 266=266MHz (DDR533)
+ 333=333MHz (DDR667)
+ 400=400MHz (DDR800)*/
+-#define NV_ECC_CAP 4 /* Bus ECC capable (1-bits)
++#define NV_MIN_MEMCLK 4 /* Minimum platform demonstrated Memclock (10-bits) */
++#define NV_ECC_CAP 5 /* Bus ECC capable (1-bits)
+ 0=Platform not capable
+ 1=Platform is capable*/
+-#define NV_4RANKType 5 /* Quad Rank DIMM slot type (2-bits)
++#define NV_4RANKType 6 /* Quad Rank DIMM slot type (2-bits)
+ 0=Normal
+ 1=R4 (4-Rank Registered DIMMs in AMD server configuration)
+ 2=S4 (Unbuffered SO-DIMMs)*/
+-#define NV_BYPMAX 6 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition).
++#define NV_BYPMAX 7 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition).
+ 4=4 times bypass (normal for non-UMA systems)
+ 7=7 times bypass (normal for UMA systems)*/
+-#define NV_RDWRQBYP 7 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition).
++#define NV_RDWRQBYP 8 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition).
+ 2=8 times (normal for non-UMA systems)
+ 3=16 times (normal for UMA systems)*/
+
+@@ -671,8 +672,9 @@ struct DCTStatStruc { /* A per Node structure*/
+ #define NV_ECCRedir 54 /* Dram ECC Redirection enable*/
+ #define NV_DramBKScrub 55 /* Dram ECC Background Scrubber CTL*/
+ #define NV_L2BKScrub 56 /* L2 ECC Background Scrubber CTL*/
+-#define NV_DCBKScrub 57 /* DCache ECC Background Scrubber CTL*/
+-#define NV_CS_SpareCTL 58 /* Chip Select Spare Control bit 0:
++#define NV_L3BKScrub 57 /* L3 ECC Background Scrubber CTL*/
++#define NV_DCBKScrub 58 /* DCache ECC Background Scrubber CTL*/
++#define NV_CS_SpareCTL 59 /* Chip Select Spare Control bit 0:
+ 0=disable Spare
+ 1=enable Spare */
+ /* Chip Select Spare Control bit 1-4:
+@@ -712,7 +714,7 @@ u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass);
+ u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
+ void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+-u32 mctGetLogicalCPUID(u32 Node);
++uint64_t mctGetLogicalCPUID(u32 Node);
+ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 Pass);
+ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+diff --git a/src/northbridge/amd/amdmct/mct/mctpro_d.c b/src/northbridge/amd/amdmct/mct/mctpro_d.c
+index c332357..fe56201 100644
+--- a/src/northbridge/amd/amdmct/mct/mctpro_d.c
++++ b/src/northbridge/amd/amdmct/mct/mctpro_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -23,7 +24,7 @@ void EarlySampleSupport_D(void)
+
+ u32 procOdtWorkaround(struct DCTStatStruc *pDCTstat, u32 dct, u32 val)
+ {
+- u32 tmp;
++ uint64_t tmp;
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+ val &= 0x0FFFFFFF;
+@@ -42,7 +43,7 @@ u32 OtherTiming_A_D(struct DCTStatStruc *pDCTstat, u32 val)
+ * ( F2x[1, 0]8C[1:0] > 00b). Silicon Status: Fixed in Rev B
+ * FIXME: check if this is still required.
+ */
+- u32 tmp;
++ uint64_t tmp;
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+ if(!(val & (3<<12) ))
+@@ -54,7 +55,7 @@ u32 OtherTiming_A_D(struct DCTStatStruc *pDCTstat, u32 val)
+
+ void mct_ForceAutoPrecharge_D(struct DCTStatStruc *pDCTstat, u32 dct)
+ {
+- u32 tmp;
++ uint64_t tmp;
+ u32 reg;
+ u32 reg_off;
+ u32 dev;
+@@ -96,7 +97,7 @@ void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,
+ * FIXME: check this.
+ */
+
+- u32 tmp;
++ uint64_t tmp;
+ u32 dev;
+ u32 reg;
+ u32 val;
+@@ -143,10 +144,9 @@ void mct_BeforeDQSTrain_Samp_D(struct MCTStatStruc *pMCTstat,
+ u32 index;
+ u32 reg;
+ u32 val;
+- u32 tmp;
++ uint64_t tmp;
+ u32 Channel;
+
+-
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+
+@@ -206,7 +206,7 @@ u32 Modify_D3CMP(struct DCTStatStruc *pDCTstat, u32 dct, u32 value)
+ u32 index_reg;
+ u32 index;
+ u32 val;
+- u32 tmp;
++ uint64_t tmp;
+
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+@@ -237,7 +237,7 @@ void SyncSetting(struct DCTStatStruc *pDCTstat)
+ * Silicon Status: Fix TBD
+ */
+
+- u32 tmp;
++ uint64_t tmp;
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+ pDCTstat->CH_ODC_CTL[1] = pDCTstat->CH_ODC_CTL[0];
+@@ -278,7 +278,7 @@ u32 CheckNBCOFAutoPrechg(struct DCTStatStruc *pDCTstat, u32 dct)
+
+ void mct_BeforeDramInit_D(struct DCTStatStruc *pDCTstat, u32 dct)
+ {
+- u32 tmp;
++ uint64_t tmp;
+ u32 Speed;
+ u32 ch, ch_start, ch_end;
+ u32 index_reg;
+@@ -286,7 +286,6 @@ void mct_BeforeDramInit_D(struct DCTStatStruc *pDCTstat, u32 dct)
+ u32 dev;
+ u32 val;
+
+-
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+ Speed = pDCTstat->Speed;
+@@ -331,7 +330,7 @@ static u8 mct_checkFenceHoleAdjust_D(struct MCTStatStruc *pMCTstat,
+ u8 ChipSel, u8 *result)
+ {
+ u8 ByteLane;
+- u32 tmp;
++ uint64_t tmp;
+
+ tmp = pDCTstat->LogicalCPUID;
+ if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) {
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+index 12dfff1..74066b1 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+@@ -75,6 +75,8 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static u16 Get_Fk_D(u8 k);
+ static u8 Get_DIMMAddress_D(struct DCTStatStruc *pDCTstat, u8 i);
++static void mct_preInitDCT(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat);
+ static void mct_initDCT(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static void mct_DramInit(struct MCTStatStruc *pMCTstat,
+@@ -105,11 +107,11 @@ static void Get_TrwtTO(struct MCTStatStruc *pMCTstat,
+ static void Get_TrwtWB(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat,
+- u32 dev, u32 index_reg);
++ u32 dev, uint8_t dct, u32 index_reg);
+ static void Get_WrDatGross_Diff(struct DCTStatStruc *pDCTstat, u8 dct,
+ u32 dev, u32 index_reg);
+ static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat,
+- u32 dev, u32 index_reg, u32 index);
++ u32 dev, uint8_t dct, u32 index_reg, u32 index);
+ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, u8 dct,
+@@ -128,6 +130,8 @@ static void SetCKETriState(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static void SetODTTriState(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
++static void InitDDRPhy(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct);
+ static void InitPhyCompensation(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static u32 mct_NodePresent_D(void);
+@@ -138,7 +142,9 @@ static void mct_ResetDataStruct_D(struct MCTStatStruc *pMCTstat,
+ static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat);
++ struct DCTStatStruc *pDCTstat, u8 dct);
++static void mct_ProgramODT_D(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct);
+ void mct_ClrClToNB_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat,
+@@ -158,6 +164,10 @@ static u32 mct_DisDllShutdownSR(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u32 DramConfigLo, u8 dct);
+ static void mct_EnDllShutdownSR(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
++static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat);
++void SetTargetFreq(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat);
+
+ static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel);
+@@ -165,7 +175,8 @@ static u32 mct_DramTermDyn_RDimm(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dimm);
+ static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2);
+ static void mct_BeforeDQSTrainSamp(struct DCTStatStruc *pDCTstat);
+-static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
++static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstatA, uint8_t Pass);
+ static u8 Get_Latency_Diff(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static void SyncSetting(struct DCTStatStruc *pDCTstat);
+@@ -173,6 +184,12 @@ static u8 crcCheck(u8 smbaddr);
+ static void mct_ExtMCTConfig_Bx(struct DCTStatStruc *pDCTstat);
+ static void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat);
+
++static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay,
++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg);
++
++static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay,
++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg);
++
+ /*See mctAutoInitMCT header for index relationships to CL and T*/
+ static const u16 Table_F_k[] = {00,200,266,333,400,533 };
+ static const u8 Tab_BankAddr[] = {0x3F,0x01,0x09,0x3F,0x3F,0x11,0x0A,0x19,0x12,0x1A,0x21,0x22,0x23};
+@@ -223,6 +240,936 @@ static const u8 Table_Comp_Rise_Slew_15x[] = {7, 7, 3, 2, 0xFF};
+ static const u8 Table_Comp_Fall_Slew_20x[] = {7, 5, 3, 2, 0xFF};
+ static const u8 Table_Comp_Fall_Slew_15x[] = {7, 7, 5, 3, 0xFF};
+
++static uint8_t dct_ddr_voltage_index(struct DCTStatStruc *pDCTstat, uint8_t dct)
++{
++ uint8_t dimm;
++ uint8_t ddr_voltage_index = 0;
++
++ /* Find current DDR supply voltage for this DCT */
++ for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm++) {
++ if (pDCTstat->DIMMValidDCT[dct] & (1 << dimm))
++ ddr_voltage_index |= pDCTstat->DimmConfiguredVoltage[dimm];
++ }
++ if (ddr_voltage_index > 0x7) {
++ printk(BIOS_DEBUG, "%s: Insufficient DDR supply voltage indicated! Configuring processor for 1.25V operation, but this attempt may fail...\n", __func__);
++ ddr_voltage_index = 0x4;
++ }
++ if (ddr_voltage_index == 0x0) {
++ printk(BIOS_DEBUG, "%s: No DDR supply voltage indicated! Configuring processor for 1.5V operation, but this attempt may fail...\n", __func__);
++ ddr_voltage_index = 0x1;
++ }
++
++ return ddr_voltage_index;
++}
++
++static uint16_t fam15h_mhz_to_memclk_config(uint16_t freq)
++{
++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
++ uint16_t iter;
++
++ /* Compute the index value for the given frequency */
++ for (iter = 0; iter <= 0x16; iter++) {
++ if (fam15h_freq_tab[iter] == freq)
++ break;
++ }
++ if (fam15h_freq_tab[iter] == freq)
++ freq = iter;
++ if (freq == 0)
++ freq = 0x4;
++
++ return freq;
++}
++
++static uint16_t fam10h_mhz_to_memclk_config(uint16_t freq)
++{
++ uint16_t fam10h_freq_tab[] = {0, 0, 0, 400, 533, 667, 800};
++ uint16_t iter;
++
++ /* Compute the index value for the given frequency */
++ for (iter = 0; iter <= 0x6; iter++) {
++ if (fam10h_freq_tab[iter] == freq)
++ break;
++ }
++ if (fam10h_freq_tab[iter] == freq)
++ freq = iter;
++ if (freq == 0)
++ freq = 0x3;
++
++ return freq;
++}
++
++static uint16_t mhz_to_memclk_config(uint16_t freq)
++{
++ if (is_fam15h())
++ return fam15h_mhz_to_memclk_config(freq);
++ else
++ return fam10h_mhz_to_memclk_config(freq) + 1;
++}
++
++static uint32_t fam15h_phy_predriver_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength)
++{
++ uint8_t lrdimm = 0;
++ uint8_t package_type;
++ uint8_t ddr_voltage_index;
++ uint32_t calibration_code = 0;
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
++ package_type = mctGet_NVbits(NV_PACK_TYPE);
++
++ if (!lrdimm) {
++ /* Not an LRDIMM */
++ if ((package_type == PT_M2) || (package_type == PT_GR)) {
++ /* Socket AM3 or G34 */
++ if (ddr_voltage_index & 0x4) {
++ /* 1.25V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 43 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xdb6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x924;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xfff;
++ }
++ }
++ else if (ddr_voltage_index & 0x2) {
++ /* 1.35V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 42 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xdb6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xbd6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xdb6;
++ }
++ }
++ else if (ddr_voltage_index & 0x1) {
++ /* 1.5V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 41 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xb6d;
++ }
++ }
++ }
++ else if (package_type == PT_C3) {
++ /* Socket C32 */
++ if (ddr_voltage_index & 0x4) {
++ /* 1.25V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 46 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xdb6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x924;
++ } else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xfff;
++ }
++ }
++ else if (ddr_voltage_index & 0x2) {
++ /* 1.35V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 45 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xdb6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xdb6;
++ }
++ }
++ else if (ddr_voltage_index & 0x1) {
++ /* 1.5V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 44 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xfff;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xb6d;
++ }
++ }
++ }
++ } else {
++ /* LRDIMM */
++
++ /* TODO
++ * Implement LRDIMM support
++ * See Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Tables 47 - 49
++ */
++ }
++
++ return calibration_code;
++}
++
++static uint32_t fam15h_phy_predriver_cmd_addr_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength)
++{
++ uint8_t ddr_voltage_index;
++ uint32_t calibration_code = 0;
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
++
++ if (ddr_voltage_index & 0x4) {
++ /* 1.25V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 52 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xdad;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xdad;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xb64;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xb64;
++ }
++ }
++ else if (ddr_voltage_index & 0x2) {
++ /* 1.35V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 51 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x924;
++ }
++ }
++ else if (ddr_voltage_index & 0x1) {
++ /* 1.5V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 50 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x492;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x492;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x6db;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x6db;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xb6d;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xb6d;
++ }
++ }
++
++ return calibration_code;
++}
++
++static uint32_t fam15h_phy_predriver_clk_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength)
++{
++ uint8_t ddr_voltage_index;
++ uint32_t calibration_code = 0;
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
++
++ if (ddr_voltage_index & 0x4) {
++ /* 1.25V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 55 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xdad;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xdad;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x924;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xff6;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xff6;
++ }
++ }
++ else if (ddr_voltage_index & 0x2) {
++ /* 1.35V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 54 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xdad;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xdad;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x924;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xdad;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xdad;
++ }
++ }
++ else if (ddr_voltage_index & 0x1) {
++ /* 1.5V */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 53 */
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
++ /* DDR3-667 - DDR3-800 */
++ if (drive_strength == 0x0)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x1)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x2)
++ calibration_code = 0x924;
++ else if (drive_strength == 0x3)
++ calibration_code = 0x924;
++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) {
++ /* DDR3-1066 - DDR3-1333 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xb6d;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (drive_strength == 0x0)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x1)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x2)
++ calibration_code = 0xff6;
++ else if (drive_strength == 0x3)
++ calibration_code = 0xff6;
++ }
++ }
++
++ return calibration_code;
++}
++
++static uint32_t fam15h_output_driver_compensation_code(struct DCTStatStruc *pDCTstat, uint8_t dct)
++{
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ uint8_t package_type;
++ uint32_t calibration_code = 0;
++
++ package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ /* Obtain number of DIMMs on channel */
++ uint8_t dimm_count = pDCTstat->MAdimms[dct];
++ uint8_t rank_count_dimm0;
++ uint8_t rank_count_dimm1;
++
++ if (package_type == PT_GR) {
++ /* Socket G34 */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 73 */
++ if (MaxDimmsInstallable == 1) {
++ if (MemClkFreq == 0x4) {
++ /* DDR3-667 */
++ calibration_code = 0x00112222;
++ }
++ else if (MemClkFreq == 0x6) {
++ /* DDR3-800 */
++ calibration_code = 0x10112222;
++ }
++ else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ calibration_code = 0x20112222;
++ }
++ else if ((MemClkFreq == 0xe) || (MemClkFreq == 0x12)) {
++ /* DDR3-1333 - DDR3-1600 */
++ calibration_code = 0x30112222;
++ }
++ else if (MemClkFreq == 0x16) {
++ /* DDR3-1866 */
++ calibration_code = 0x30332222;
++ }
++ } else if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ if (MemClkFreq == 0x4) {
++ /* DDR3-667 */
++ calibration_code = 0x00112222;
++ }
++ else if (MemClkFreq == 0x6) {
++ /* DDR3-800 */
++ calibration_code = 0x10112222;
++ }
++ else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ calibration_code = 0x20112222;
++ }
++ else if ((MemClkFreq == 0xe) || (MemClkFreq == 0x12)) {
++ /* DDR3-1333 - DDR3-1600 */
++ calibration_code = 0x30112222;
++ }
++ } else if (dimm_count == 2) {
++ /* 2 DIMMs detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0];
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if (MemClkFreq == 0x4) {
++ /* DDR3-667 */
++ calibration_code = 0x10222222;
++ }
++ else if (MemClkFreq == 0x6) {
++ /* DDR3-800 */
++ calibration_code = 0x20222222;
++ }
++ else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ calibration_code = 0x30222222;
++ }
++ else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ calibration_code = 0x30222222;
++ }
++ else if (MemClkFreq == 0x12) {
++ /* DDR3-1600 */
++ if ((rank_count_dimm0 == 1) && (rank_count_dimm1 == 1))
++ calibration_code = 0x30222222;
++ else
++ calibration_code = 0x30112222;
++ }
++ }
++ } else if (MaxDimmsInstallable == 3) {
++ /* TODO
++ * 3 DIMM/channel support unimplemented
++ */
++ }
++ } else {
++ /* TODO
++ * Other socket support unimplemented
++ */
++ }
++
++ return calibration_code;
++}
++
++static uint32_t fam15h_address_timing_compensation_code(struct DCTStatStruc *pDCTstat, uint8_t dct)
++{
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ uint8_t package_type;
++ uint32_t calibration_code = 0;
++
++ package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ /* Obtain number of DIMMs on channel */
++ uint8_t dimm_count = pDCTstat->MAdimms[dct];
++ uint8_t rank_count_dimm0;
++ uint8_t rank_count_dimm1;
++
++ if (package_type == PT_GR) {
++ /* Socket G34 */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 73 */
++ if (MaxDimmsInstallable == 1) {
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if (MemClkFreq == 0x4) {
++ /* DDR3-667 */
++ if (rank_count_dimm0 == 1)
++ calibration_code = 0x00000000;
++ else
++ calibration_code = 0x003b0000;
++ } else if (MemClkFreq == 0x6) {
++ /* DDR3-800 */
++ if (rank_count_dimm0 == 1)
++ calibration_code = 0x00000000;
++ else
++ calibration_code = 0x003b0000;
++ } else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ calibration_code = 0x00383837;
++ } else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ calibration_code = 0x00363635;
++ } else if (MemClkFreq == 0x12) {
++ /* DDR3-1600 */
++ if (rank_count_dimm0 == 1)
++ calibration_code = 0x00353533;
++ else
++ calibration_code = 0x00003533;
++ } else if (MemClkFreq == 0x16) {
++ /* DDR3-1866 */
++ calibration_code = 0x00333330;
++ }
++ } else if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if (MemClkFreq == 0x4) {
++ /* DDR3-667 */
++ if (rank_count_dimm0 == 1)
++ calibration_code = 0x00000000;
++ else
++ calibration_code = 0x003b0000;
++ } else if (MemClkFreq == 0x6) {
++ /* DDR3-800 */
++ if (rank_count_dimm0 == 1)
++ calibration_code = 0x00000000;
++ else
++ calibration_code = 0x003b0000;
++ } else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ calibration_code = 0x00383837;
++ } else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ calibration_code = 0x00363635;
++ } else if (MemClkFreq == 0x12) {
++ /* DDR3-1600 */
++ if (rank_count_dimm0 == 1)
++ calibration_code = 0x00353533;
++ else
++ calibration_code = 0x00003533;
++ }
++ } else if (dimm_count == 2) {
++ /* 2 DIMMs detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0];
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if (MemClkFreq == 0x4) {
++ /* DDR3-667 */
++ calibration_code = 0x00390039;
++ } else if (MemClkFreq == 0x6) {
++ /* DDR3-800 */
++ calibration_code = 0x00390039;
++ } else if (MemClkFreq == 0xa) {
++ /* DDR3-1066 */
++ calibration_code = 0x003a3a3a;
++ } else if (MemClkFreq == 0xe) {
++ /* DDR3-1333 */
++ calibration_code = 0x00003939;
++ } else if (MemClkFreq == 0x12) {
++ /* DDR3-1600 */
++ if ((rank_count_dimm0 == 1) && (rank_count_dimm1 == 1))
++ calibration_code = 0x00003738;
++ }
++ }
++ } else if (MaxDimmsInstallable == 3) {
++ /* TODO
++ * 3 DIMM/channel support unimplemented
++ */
++ }
++ } else {
++ /* TODO
++ * Other socket support unimplemented
++ */
++ }
++
++ return calibration_code;
++}
++
++static uint8_t fam15h_slow_access_mode(struct DCTStatStruc *pDCTstat, uint8_t dct)
++{
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ uint8_t package_type;
++ uint32_t slow_access = 0;
++
++ package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ /* Obtain number of DIMMs on channel */
++ uint8_t dimm_count = pDCTstat->MAdimms[dct];
++ uint8_t rank_count_dimm0;
++ uint8_t rank_count_dimm1;
++
++ if (package_type == PT_GR) {
++ /* Socket G34 */
++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 73 */
++ if (MaxDimmsInstallable == 1) {
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)
++ || (MemClkFreq == 0xa) | (MemClkFreq == 0xe)) {
++ /* DDR3-667 - DDR3-1333 */
++ slow_access = 0;
++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) {
++ /* DDR3-1600 - DDR3-1866 */
++ if (rank_count_dimm0 == 1)
++ slow_access = 0;
++ else
++ slow_access = 1;
++ }
++ } else if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)
++ || (MemClkFreq == 0xa) | (MemClkFreq == 0xe)) {
++ /* DDR3-667 - DDR3-1333 */
++ slow_access = 0;
++ }
++ else if (MemClkFreq == 0x12) {
++ /* DDR3-1600 */
++ if (rank_count_dimm0 == 1)
++ slow_access = 0;
++ else
++ slow_access = 1;
++ }
++ } else if (dimm_count == 2) {
++ /* 2 DIMMs detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0];
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++
++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)
++ || (MemClkFreq == 0xa)) {
++ /* DDR3-667 - DDR3-1066 */
++ slow_access = 0;
++ }
++ else if ((MemClkFreq == 0xe) || (MemClkFreq == 0x12)) {
++ /* DDR3-1333 - DDR3-1600 */
++ slow_access = 1;
++ }
++ }
++ } else if (MaxDimmsInstallable == 3) {
++ /* TODO
++ * 3 DIMM/channel support unimplemented
++ */
++ }
++ } else {
++ /* TODO
++ * Other socket support unimplemented
++ */
++ }
++
++ return slow_access;
++}
++
++static void set_2t_configuration(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct)
++{
++ uint32_t dev;
++ uint32_t reg;
++ uint32_t dword;
++
++ uint8_t enable_slow_access_mode = 0;
++ dev = pDCTstat->dev_dct;
++
++ if (is_fam15h()) {
++ if (pDCTstat->_2Tmode)
++ enable_slow_access_mode = 1;
++ } else {
++ if (pDCTstat->_2Tmode == 2)
++ enable_slow_access_mode = 1;
++ }
++
++ reg = 0x94; /* DRAM Configuration High */
++ dword = Get_NB32_DCT(dev, dct, reg);
++ if (enable_slow_access_mode)
++ dword |= (0x1 << 20); /* Set 2T CMD mode */
++ else
++ dword &= ~(0x1 << 20); /* Clear 2T CMD mode */
++ Set_NB32_DCT(dev, dct, reg, dword);
++}
++
++static void precise_ndelay_fam15(struct MCTStatStruc *pMCTstat, uint32_t nanoseconds) {
++ msr_t tsc_msr;
++ uint64_t cycle_count = (((uint64_t)pMCTstat->TSCFreq) * nanoseconds) / 1000;
++ uint64_t start_timestamp;
++ uint64_t current_timestamp;
++
++ tsc_msr = rdmsr(0x00000010);
++ start_timestamp = (((uint64_t)tsc_msr.hi) << 32) | tsc_msr.lo;
++ do {
++ tsc_msr = rdmsr(0x00000010);
++ current_timestamp = (((uint64_t)tsc_msr.hi) << 32) | tsc_msr.lo;
++ } while ((current_timestamp - start_timestamp) < cycle_count);
++}
++
++static void precise_memclk_delay_fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t clocks) {
++ uint16_t memclk_freq;
++ uint32_t delay_ns;
++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
++
++ memclk_freq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++
++ delay_ns = (((uint64_t)clocks * 1000) / fam15h_freq_tab[memclk_freq]);
++ precise_ndelay_fam15(pMCTstat, delay_ns);
++}
++
+ static void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+ {
+@@ -277,10 +1224,26 @@ static void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
+ restartinit:
+ mctInitMemGPIOs_A_D(); /* Set any required GPIOs*/
+ if (s3resume) {
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_En_Fam15\n");
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++
++ mct_ForceNBPState0_En_Fam15(pMCTstat, pDCTstat);
++ }
++
+ #if IS_ENABLED(CONFIG_HAVE_ACPI_RESUME)
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: Restoring DCT configuration from NVRAM\n");
+ restore_mct_information_from_nvram();
+ #endif
++
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++
++ mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
++ }
+ } else {
+ NodesWmem = 0;
+ node_sys_base = 0;
+@@ -297,15 +1260,15 @@ restartinit:
+ pDCTstat->dev_map = PA_MAP(Node);
+ pDCTstat->dev_dct = PA_DCT(Node);
+ pDCTstat->dev_nbmisc = PA_NBMISC(Node);
++ pDCTstat->dev_link = PA_LINK(Node);
++ pDCTstat->dev_nbctl = PA_NBCTL(Node);
+ pDCTstat->NodeSysBase = node_sys_base;
+
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_init Node %d\n", Node);
+ mct_init(pMCTstat, pDCTstat);
+ mctNodeIDDebugPort_D();
+ pDCTstat->NodePresent = NodePresent_D(Node);
+- if (pDCTstat->NodePresent) { /* See if Node is there*/
+- printk(BIOS_DEBUG, "mctAutoInitMCT_D: clear_legacy_Mode\n");
+- clear_legacy_Mode(pMCTstat, pDCTstat);
++ if (pDCTstat->NodePresent) {
+ pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node);
+
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_InitialMCT_D\n");
+@@ -314,6 +1277,26 @@ restartinit:
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctSMBhub_Init\n");
+ mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/
+
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_preInitDCT\n");
++ mct_preInitDCT(pMCTstat, pDCTstat);
++ }
++ node_sys_base = pDCTstat->NodeSysBase;
++ node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F;
++ }
++
++#if IS_ENABLED(DIMM_VOLTAGE_SET_SUPPORT)
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: DIMMSetVoltage\n");
++ DIMMSetVoltages(pMCTstat, pDCTstatA); /* Set the DIMM voltages (mainboard specific) */
++#endif
++
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++
++ if (pDCTstat->NodePresent) {
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctSMBhub_Init\n");
++ mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/
++
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_initDCT\n");
+ mct_initDCT(pMCTstat, pDCTstat);
+ if (pDCTstat->ErrCode == SC_FatalErr) {
+@@ -321,20 +1304,13 @@ restartinit:
+ } else if (pDCTstat->ErrCode < SC_StopError) {
+ NodesWmem++;
+ }
+- } /* if Node present */
+- node_sys_base = pDCTstat->NodeSysBase;
+- node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F;
++ }
+ }
+ if (NodesWmem == 0) {
+ printk(BIOS_DEBUG, "No Nodes?!\n");
+ goto fatalexit;
+ }
+
+-#if IS_ENABLED(DIMM_VOLTAGE_SET_SUPPORT)
+- printk(BIOS_DEBUG, "mctAutoInitMCT_D: DIMMSetVoltage\n");
+- DIMMSetVoltages(pMCTstat, pDCTstatA); /* Set the DIMM voltages (mainboard specific) */
+-#endif
+-
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: SyncDCTsReady_D\n");
+ SyncDCTsReady_D(pMCTstat, pDCTstatA); /* Make sure DCTs are ready for accesses.*/
+
+@@ -355,7 +1331,6 @@ restartinit:
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D: :OtherTiming\n");
+ mct_OtherTiming(pMCTstat, pDCTstatA);
+
+-
+ if (ReconfigureDIMMspare_D(pMCTstat, pDCTstatA)) { /* RESET# if 1st pass of DIMM spare enabled*/
+ goto restartinit;
+ }
+@@ -369,6 +1344,14 @@ restartinit:
+ MCTMemClr_D(pMCTstat,pDCTstatA);
+ }
+
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++
++ mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
++ }
++
+ mct_FinalMCT_D(pMCTstat, pDCTstatA);
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus);
+ }
+@@ -408,6 +1391,425 @@ static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat,
+ return ret;
+ }
+
++/* Enable or disable phy-assisted training mode
++ * Phy-assisted training mode applies to the follow DRAM training procedures:
++ * Write Levelization Training (2.10.5.8.1)
++ * DQS Receiver Enable Training (2.10.5.8.2)
++ */
++static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t enable)
++{
++ uint8_t index;
++ uint32_t dword;
++ uint32_t index_reg = 0x98;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ if (enable) {
++ /* Enable training mode */
++ dword = Get_NB32_DCT(dev, dct, 0x78); /* DRAM Control */
++ dword &= ~(0x1 << 17); /* AddrCmdTriEn = 0 */
++ Set_NB32_DCT(dev, dct, 0x78, dword); /* DRAM Control */
++
++ dword = Get_NB32_DCT(dev, dct, 0x8c); /* DRAM Timing High */
++ dword |= (0x1 << 18); /* DisAutoRefresh = 1 */
++ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */
++
++ dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */
++ dword &= ~(0xf << 24); /* DcqBypassMax = 0 */
++ dword &= ~(0x1 << 22); /* BankSwizzleMode = 0 */
++ dword &= ~(0x1 << 15); /* PowerDownEn = 0 */
++ dword &= ~(0x3 << 10); /* ZqcsInterval = 0 */
++ Set_NB32_DCT(dev, dct, 0x94, dword); /* DRAM Configuration High */
++
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d);
++ dword &= ~(0xf << 16); /* RxMaxDurDllNoLock = 0 */
++ dword &= ~(0xf); /* TxMaxDurDllNoLock = 0 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d, dword);
++
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8));
++ dword &= ~(0x1 << 12); /* EnRxPadStandby = 0 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8), dword);
++ }
++
++ dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */
++ dword &= ~(0x1 << 11); /* BwCapEn = 0 */
++ dword &= ~(0x1 << 8); /* ODTSEn = 0 */
++ Set_NB32_DCT(dev, dct, 0xa4, dword); /* DRAM Controller Temperature Throttle */
++
++ dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */
++ dword &= ~(0x1 << 2); /* DctSelIntLvEn = 0 */
++ Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */
++
++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58); /* Scrub Rate Control */
++ dword &= ~(0x1f << 24); /* L3Scrub = 0 */
++ dword &= ~(0x1f); /* DramScrub = 0 */
++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58, dword); /* Scrub Rate Control */
++
++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c); /* DRAM Scrub Address Low */
++ dword &= ~(0x1); /* ScrubReDirEn = 0 */
++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c, dword); /* DRAM Scrub Address Low */
++
++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8); /* L3 Control 1 */
++ dword |= (0x1 << 4); /* L3ScrbRedirDis = 1 */
++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8, dword); /* L3 Control 1 */
++
++ /* Fam15h BKDG section 2.10.5.5.1 */
++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */
++ dword &= ~(0xf << 24); /* TrdrdSdSc = 0xb */
++ dword |= (0xb << 24);
++ dword &= ~(0xf << 16); /* TrdrdSdDc = 0xb */
++ dword |= (0xb << 16);
++ dword &= ~(0xf); /* TrdrdDd = 0xb */
++ dword |= 0xb;
++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */
++
++ /* Fam15h BKDG section 2.10.5.5.2 */
++ dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */
++ dword &= ~(0xf << 16); /* TwrwrSdSc = 0xb */
++ dword |= (0xb << 16);
++ dword &= ~(0xf << 8); /* TwrwrSdDc = 0xb */
++ dword |= (0xb << 8);
++ dword &= ~(0xf); /* TwrwrDd = 0xb */
++ dword |= 0xb;
++ Set_NB32_DCT(dev, dct, 0x214, dword); /* DRAM Timing 4 */
++
++ /* Fam15h BKDG section 2.10.5.5.3 */
++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */
++ dword &= ~(0xf << 8); /* Twrrd = 0xb */
++ dword |= (0xb << 8);
++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */
++
++ /* Fam15h BKDG section 2.10.5.5.4 */
++ dword = Get_NB32_DCT(dev, dct, 0x21c); /* DRAM Timing 6 */
++ dword &= ~(0x1f << 8); /* TrwtTO = 0x16 */
++ dword |= (0x16 << 8);
++ dword &= ~(0x1f << 16); /* TrwtWB = TrwtTO + 1 */
++ dword |= ((((dword >> 8) & 0x1f) + 1) << 16);
++ Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */
++ } else {
++ /* Disable training mode */
++ uint8_t lane;
++ uint8_t dimm;
++ uint8_t receiver;
++ uint8_t max_lane;
++ uint8_t ecc_enabled;
++ uint8_t x4_present = 0;
++ uint8_t x8_present = 0;
++ uint8_t memclk_index;
++ uint8_t interleave_channels = 0;
++ uint8_t redirect_ecc_scrub = 0;
++ uint16_t trdrdsddc;
++ uint16_t trdrddd;
++ uint16_t cdd_trdrddd;
++ uint16_t twrwrsddc;
++ uint16_t twrwrdd;
++ uint16_t cdd_twrwrdd;
++ uint16_t twrrd;
++ uint16_t trwtto;
++ uint8_t first_dimm;
++ uint16_t delay;
++ uint16_t delay2;
++ uint8_t read_odt_delay;
++ uint8_t write_odt_delay;
++ uint16_t difference;
++ uint16_t current_total_delay_1[MAX_BYTE_LANES];
++ uint16_t current_total_delay_2[MAX_BYTE_LANES];
++
++ /* FIXME
++ * This should be platform configurable
++ */
++ uint8_t dimm_event_l_pin_support = 0;
++
++ ecc_enabled = !!(pMCTstat->GStatus & 1 << GSB_ECCDIMMs);
++ if (ecc_enabled)
++ max_lane = 9;
++ else
++ max_lane = 8;
++
++ if (pDCTstat->Dimmx4Present & ((dct)?0xaa:0x55))
++ x4_present = 1;
++ if (pDCTstat->Dimmx8Present & ((dct)?0xaa:0x55))
++ x8_present = 1;
++ memclk_index = Get_NB32_DCT(dev, dct, 0x94) & 0x1f;
++
++ if (pDCTstat->DIMMValidDCT[0] && pDCTstat->DIMMValidDCT[1] && mctGet_NVbits(NV_Unganged))
++ interleave_channels = 1;
++
++ if ((pMCTstat->GStatus & 1 << GSB_ECCDIMMs) && mctGet_NVbits(NV_ECCRedir))
++ redirect_ecc_scrub = 1;
++
++ dword = (Get_NB32_DCT(dev, dct, 0x240) >> 4) & 0xf;
++ if (dword > 6)
++ read_odt_delay = dword - 6;
++ else
++ read_odt_delay = 0;
++
++ dword = Get_NB32_DCT(dev, dct, 0x240);
++ delay = (dword >> 4) & 0xf;
++ if (delay > 6)
++ read_odt_delay = delay - 6;
++ else
++ read_odt_delay = 0;
++ delay = (dword >> 12) & 0x7;
++ if (delay > 6)
++ write_odt_delay = delay - 6;
++ else
++ write_odt_delay = 0;
++
++ /* TODO:
++ * Adjust trdrdsddc if four-rank DIMMs are installed per
++ * section 2.10.5.5.1 of the Family 15h BKDG.
++ * cdd_trdrdsddc will also need to be calculated in that process.
++ */
++ trdrdsddc = 3;
++
++ /* Calculate the Critical Delay Difference for TrdrdDd */
++ cdd_trdrddd = 0;
++ first_dimm = 1;
++ for (receiver = 0; receiver < 8; receiver += 2) {
++ dimm = (receiver >> 1);
++
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver))
++ continue;
++
++ read_dqs_receiver_enable_control_registers(current_total_delay_2, dev, dct, dimm, index_reg);
++
++ if (first_dimm) {
++ memcpy(current_total_delay_1, current_total_delay_2, sizeof(current_total_delay_1));
++ first_dimm = 0;
++ }
++
++ for (lane = 0; lane < max_lane; lane++) {
++ if (current_total_delay_1[lane] > current_total_delay_2[lane])
++ difference = current_total_delay_1[lane] - current_total_delay_2[lane];
++ else
++ difference = current_total_delay_2[lane] - current_total_delay_1[lane];
++
++ if (difference > cdd_trdrddd)
++ cdd_trdrddd = difference;
++ }
++ }
++
++ /* Convert the difference to MEMCLKs */
++ cdd_trdrddd = (((cdd_trdrddd >> 5) & 0x1f) + 1) / 2;
++
++ /* Calculate Trdrddd */
++ delay = (read_odt_delay + 3) * 2;
++ delay2 = cdd_trdrddd + 7;
++ if (delay2 > delay)
++ delay = delay2;
++ trdrddd = (delay + 1) / 2; /* + 1 is equivalent to ceiling function here */
++ if (trdrdsddc > trdrddd)
++ trdrddd = trdrdsddc;
++
++ /* TODO:
++ * Adjust twrwrsddc if four-rank DIMMs are installed per
++ * section 2.10.5.5.1 of the Family 15h BKDG.
++ * cdd_twrwrsddc will also need to be calculated in that process.
++ */
++ twrwrsddc = 4;
++
++ /* Calculate the Critical Delay Difference for TwrwrDd */
++ cdd_twrwrdd = 0;
++ first_dimm = 1;
++ for (receiver = 0; receiver < 8; receiver += 2) {
++ dimm = (receiver >> 1);
++
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver))
++ continue;
++
++ read_dqs_write_timing_control_registers(current_total_delay_2, dev, dct, dimm, index_reg);
++
++ if (first_dimm) {
++ memcpy(current_total_delay_1, current_total_delay_2, sizeof(current_total_delay_1));
++ first_dimm = 0;
++ }
++
++ for (lane = 0; lane < max_lane; lane++) {
++ if (current_total_delay_1[lane] > current_total_delay_2[lane])
++ difference = current_total_delay_1[lane] - current_total_delay_2[lane];
++ else
++ difference = current_total_delay_2[lane] - current_total_delay_1[lane];
++
++ if (difference > cdd_twrwrdd)
++ cdd_twrwrdd = difference;
++ }
++ }
++
++ /* Convert the difference to MEMCLKs */
++ cdd_twrwrdd = (((cdd_twrwrdd >> 5) & 0x1f) + 1) / 2;
++
++ /* Calculate Twrwrdd */
++ delay = (write_odt_delay + 3) * 2;
++ delay2 = cdd_twrwrdd + 7;
++ if (delay2 > delay)
++ delay = delay2;
++ twrwrdd = (delay + 1) / 2; /* + 1 is equivalent to ceiling function here */
++ if (twrwrsddc > twrwrdd)
++ twrwrdd = twrwrsddc;
++
++ dword = Get_NB32_DCT(dev, dct, 0x78); /* DRAM Control */
++ dword |= (0x1 << 17); /* AddrCmdTriEn = 1 */
++ Set_NB32_DCT(dev, dct, 0x78, dword); /* DRAM Control */
++
++ dword = Get_NB32_DCT(dev, dct, 0x8c); /* DRAM Timing High */
++ dword &= ~(0x1 << 18); /* DisAutoRefresh = 0 */
++ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */
++
++ dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */
++ dword |= (0xf << 24); /* DcqBypassMax = 0xf */
++ dword |= (0x1 << 22); /* BankSwizzleMode = 1 */
++ dword |= (0x1 << 15); /* PowerDownEn = 1 */
++ dword &= ~(0x3 << 10); /* ZqcsInterval = 0x2 */
++ dword |= (0x2 << 10);
++ Set_NB32_DCT(dev, dct, 0x94, dword); /* DRAM Configuration High */
++
++ if (x4_present && x8_present) {
++ /* Mixed channel of 4x and 8x DIMMs */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d);
++ dword &= ~(0x3 << 24); /* RxDLLWakeupTime = 0 */
++ dword &= ~(0x7 << 20); /* RxCPUpdPeriod = 0 */
++ dword &= ~(0xf << 16); /* RxMaxDurDllNoLock = 0 */
++ dword &= ~(0x3 << 8); /* TxDLLWakeupTime = 0 */
++ dword &= ~(0x7 << 4); /* TxCPUpdPeriod = 0 */
++ dword &= ~(0xf); /* TxMaxDurDllNoLock = 0 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d, dword);
++ } else {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d);
++ dword &= ~(0x3 << 24); /* RxDLLWakeupTime = 3 */
++ dword |= (0x3 << 24);
++ dword &= ~(0x7 << 20); /* RxCPUpdPeriod = 3 */
++ dword |= (0x3 << 20);
++ dword &= ~(0xf << 16); /* RxMaxDurDllNoLock = 7 */
++ dword |= (0x7 << 16);
++ dword &= ~(0x3 << 8); /* TxDLLWakeupTime = 3 */
++ dword |= (0x3 << 8);
++ dword &= ~(0x7 << 4); /* TxCPUpdPeriod = 3 */
++ dword |= (0x3 << 4);
++ dword &= ~(0xf); /* TxMaxDurDllNoLock = 7 */
++ dword |= 0x7;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d, dword);
++ }
++
++ if ((memclk_index <= 0x12) && (x4_present != x8_present)) {
++ /* MemClkFreq <= 800MHz
++ * Not a mixed channel of x4 and x8 DIMMs
++ */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8));
++ dword |= (0x1 << 12); /* EnRxPadStandby = 1 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8), dword);
++ }
++ } else {
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8));
++ dword &= ~(0x1 << 12); /* EnRxPadStandby = 0 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8), dword);
++ }
++ }
++
++ /* TODO
++ * Calculate Twrrd per section 2.10.5.5.3 of the Family 15h BKDG
++ */
++ twrrd = 0xb;
++
++ /* TODO
++ * Calculate TrwtTO per section 2.10.5.5.4 of the Family 15h BKDG
++ */
++ trwtto = 0x16;
++
++ dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */
++ dword &= ~(0x1 << 11); /* BwCapEn = 0 */
++ dword &= ~(0x1 << 8); /* ODTSEn = dimm_event_l_pin_support */
++ dword |= (dimm_event_l_pin_support & 0x1) << 8;
++ Set_NB32_DCT(dev, dct, 0xa4, dword); /* DRAM Controller Temperature Throttle */
++
++ dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */
++ dword &= ~(0x1 << 2); /* DctSelIntLvEn = interleave_channels */
++ dword |= (interleave_channels & 0x1) << 2;
++ Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */
++
++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58); /* Scrub Rate Control */
++ dword &= ~(0x1f << 24); /* L3Scrub = NV_L3BKScrub */
++ dword |= (mctGet_NVbits(NV_L3BKScrub) & 0x1f) << 24;
++ dword &= ~(0x1f); /* DramScrub = NV_DramBKScrub */
++ dword |= mctGet_NVbits(NV_DramBKScrub) & 0x1f;
++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58, dword); /* Scrub Rate Control */
++
++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c); /* DRAM Scrub Address Low */
++ dword &= ~(0x1); /* ScrubReDirEn = redirect_ecc_scrub */
++ dword |= redirect_ecc_scrub & 0x1;
++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c, dword); /* DRAM Scrub Address Low */
++
++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8); /* L3 Control 1 */
++ dword &= ~(0x1 << 4); /* L3ScrbRedirDis = 0 */
++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8, dword); /* L3 Control 1 */
++
++ /* FIXME
++ * The BKDG-recommended settings cause memory corruption on the ASUS KGPE-D16.
++ * Investigate and fix...
++ */
++#if 0
++ /* Fam15h BKDG section 2.10.5.5.1 */
++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */
++ dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */
++ dword |= (0x1 << 24);
++ dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */
++ dword |= ((trdrdsddc & 0xf) << 16);
++ dword &= ~(0xf); /* TrdrdDd = trdrddd */
++ dword |= (trdrddd & 0xf);
++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */
++#endif
++
++ /* Fam15h BKDG section 2.10.5.5.2 */
++ dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */
++ dword &= ~(0xf << 16); /* TwrwrSdSc = 0x1 */
++ dword |= (0x1 << 16);
++ dword &= ~(0xf << 8); /* TwrwrSdDc = twrwrsddc */
++ dword |= ((twrwrsddc & 0xf) << 8);
++ dword &= ~(0xf); /* TwrwrDd = twrwrdd */
++ dword |= (twrwrdd & 0xf);
++ Set_NB32_DCT(dev, dct, 0x214, dword); /* DRAM Timing 4 */
++
++ /* Fam15h BKDG section 2.10.5.5.3 */
++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */
++ dword &= ~(0xf << 8); /* Twrrd = twrrd */
++ dword |= ((twrrd & 0xf) << 8);
++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */
++
++ /* Fam15h BKDG section 2.10.5.5.4 */
++ dword = Get_NB32_DCT(dev, dct, 0x21c); /* DRAM Timing 6 */
++ dword &= ~(0x1f << 8); /* TrwtTO = trwtto */
++ dword |= ((trwtto & 0x1f) << 8);
++ dword &= ~(0x1f << 16); /* TrwtWB = TrwtTO + 1 */
++ dword |= ((((dword >> 8) & 0x1f) + 1) << 16);
++ Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */
++
++ /* Enable prefetchers */
++ dword = Get_NB32_DCT(dev, dct, 0x110); /* Memory Controller Configuration High */
++ dword &= ~(0x1 << 13); /* PrefIoDis = 0 */
++ dword &= ~(0x1 << 12); /* PrefCpuDis = 0 */
++ Set_NB32_DCT(dev, dct, 0x110, dword); /* Memory Controller Configuration High */
++ }
++}
++
++static void exit_training_mode_fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstatA)
++{
++ uint8_t node;
++ uint8_t dct;
++
++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) {
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + node;
++
++ if (pDCTstat->NodePresent)
++ for (dct = 0; dct < 2; dct++)
++ fam15EnableTrainingMode(pMCTstat, pDCTstat, dct, 0);
++ }
++}
++
+ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+ {
+@@ -424,6 +1826,20 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
+ mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA);
+ phyAssistedMemFnceTraining(pMCTstat, pDCTstatA);
+
++ if (is_fam15h()) {
++ uint8_t Node;
++ struct DCTStatStruc *pDCTstat;
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ pDCTstat = pDCTstatA + Node;
++ if (pDCTstat->NodePresent) {
++ if (pDCTstat->DIMMValidDCT[0])
++ InitPhyCompensation(pMCTstat, pDCTstat, 0);
++ if (pDCTstat->DIMMValidDCT[1])
++ InitPhyCompensation(pMCTstat, pDCTstat, 1);
++ }
++ }
++ }
++
+ if (nv_DQSTrainCTL) {
+ mctHookBeforeAnyTraining(pMCTstat, pDCTstatA);
+ /* TODO: should be in mctHookBeforeAnyTraining */
+@@ -431,16 +1847,35 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
+ _WRMSR(0x26D, 0x04040404, 0x04040404);
+ _WRMSR(0x26E, 0x04040404, 0x04040404);
+ _WRMSR(0x26F, 0x04040404, 0x04040404);
+- mct_WriteLevelization_HW(pMCTstat, pDCTstatA);
++ mct_WriteLevelization_HW(pMCTstat, pDCTstatA, FirstPass);
+
+- TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass);
++ if (is_fam15h()) {
++ /* Receiver Enable Training Pass 1 */
++ TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass);
++ }
+
+- mct_TrainDQSPos_D(pMCTstat, pDCTstatA);
++ mct_WriteLevelization_HW(pMCTstat, pDCTstatA, SecondPass);
++
++ if (is_fam15h()) {
++ /* Receiver Enable Training Pass 2 */
++ // TrainReceiverEn_D(pMCTstat, pDCTstatA, SecondPass);
++
++ /* TODO:
++ * Determine why running TrainReceiverEn_D in SecondPass
++ * mode yields less stable training values than when run
++ * in FirstPass mode as in the HACK below.
++ */
++ TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass);
++ } else {
++ TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass);
++ }
+
+- /* Second Pass never used for Barcelona! */
+- /* TrainReceiverEn_D(pMCTstat, pDCTstatA, SecondPass); */
++ mct_TrainDQSPos_D(pMCTstat, pDCTstatA);
+
+- mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA);
++ if (is_fam15h())
++ exit_training_mode_fam15(pMCTstat, pDCTstatA);
++ else
++ mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA);
+
+ /* FIXME - currently uses calculated value TrainMaxReadLatency_D(pMCTstat, pDCTstatA); */
+ mctHookAfterAnyTraining();
+@@ -476,7 +1911,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat,
+ for (Channel = 0;Channel < 2; Channel++) {
+ /* there are four receiver pairs,
+ loosely associated with chipselects.*/
+- index_reg = 0x98 + Channel * 0x100;
++ index_reg = 0x98;
+ for (Receiver = 0; Receiver < 8; Receiver += 2) {
+ /* Set Receiver Enable Values */
+ mct_SetRcvrEnDly_D(pDCTstat,
+@@ -492,7 +1927,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat,
+ txdqs = pDCTstat->CH_D_B_TxDqs[Channel][Receiver >> 1][ByteLane];
+ index = Table_DQSRcvEn_Offset[ByteLane >> 1];
+ index += (Receiver >> 1) * 3 + 0x10 + 0x20; /* Addl_Index */
+- val = Get_NB32_index_wait(dev, 0x98 + 0x100*Channel, index);
++ val = Get_NB32_index_wait_DCT(dev, Channel, 0x98, index);
+ if (ByteLane & 1) { /* odd byte lane */
+ val &= ~(0xFF << 16);
+ val |= txdqs << 16;
+@@ -500,7 +1935,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat,
+ val &= ~0xFF;
+ val |= txdqs;
+ }
+- Set_NB32_index_wait(dev, 0x98 + 0x100*Channel, index, val);
++ Set_NB32_index_wait_DCT(dev, Channel, 0x98, index, val);
+ }
+ }
+ }
+@@ -510,7 +1945,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat,
+
+ for (Channel = 0; Channel < 2; Channel++) {
+ u8 *p;
+- index_reg = 0x98 + Channel * 0x100;
++ index_reg = 0x98;
+
+ /* NOTE:
+ * when 400, 533, 667, it will support dimm0/1/2/3,
+@@ -525,7 +1960,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat,
+ if (DIMM == 0) {
+ index = 0; /* CHA Write Data Timing Low */
+ } else {
+- if (pDCTstat->Speed >= 4) {
++ if (pDCTstat->Speed >= mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
+ index = 0x100 * DIMM;
+ } else {
+ break;
+@@ -534,23 +1969,23 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat,
+ for (Dir = 0; Dir < 2; Dir++) {/* RD/WR */
+ p = pDCTstat->CH_D_DIR_B_DQS[Channel][DIMM][Dir];
+ val = stream_to_int(p); /* CHA Read Data Timing High */
+- Set_NB32_index_wait(dev, index_reg, index+1, val);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index+1, val);
+ val = stream_to_int(p+4); /* CHA Write Data Timing High */
+- Set_NB32_index_wait(dev, index_reg, index+2, val);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index+2, val);
+ val = *(p+8); /* CHA Write ECC Timing */
+- Set_NB32_index_wait(dev, index_reg, index+3, val);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index+3, val);
+ index += 4;
+ }
+ }
+ }
+
+ for (Channel = 0; Channel<2; Channel++) {
+- reg = 0x78 + Channel * 0x100;
+- val = Get_NB32(dev, reg);
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, Channel, reg);
+ val &= ~(0x3ff<<22);
+ val |= ((u32) pDCTstat->CH_MaxRdLat[Channel] << 22);
+ val &= ~(1<<DqsRcvEnTrain);
+- Set_NB32(dev, reg, val); /* program MaxRdLatency to correspond with current delay*/
++ Set_NB32_DCT(dev, Channel, reg, val); /* program MaxRdLatency to correspond with current delay*/
+ }
+ }
+ }
+@@ -812,49 +2247,70 @@ finish:
+ return ret;
+ }
+
+-static void DCTInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct)
++static void DCTPreInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+ /*
+- * Initialize DRAM on single Athlon 64/Opteron Node.
++ * Run DCT pre-initialization tasks
+ */
+- u8 stopDCTflag;
+- u32 val;
++ uint32_t dword;
+
++ /* Reset DCT registers */
+ ClearDCT_D(pMCTstat, pDCTstat, dct);
+- stopDCTflag = 1; /*preload flag with 'disable' */
+- /* enable DDR3 support */
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + dct * 0x100);
+- val |= 1 << Ddr3Mode;
+- Set_NB32(pDCTstat->dev_dct, 0x94 + dct * 0x100, val);
++ pDCTstat->stopDCT = 1; /*preload flag with 'disable' */
++
++ if (!is_fam15h()) {
++ /* Enable DDR3 support */
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94);
++ dword |= 1 << Ddr3Mode;
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword);
++ }
++
++ /* Read the SPD information into the data structures */
+ if (mct_DIMMPresence(pMCTstat, pDCTstat, dct) < SC_StopError) {
+ printk(BIOS_DEBUG, "\t\tDCTInit_D: mct_DIMMPresence Done\n");
+- if (mct_SPDCalcWidth(pMCTstat, pDCTstat, dct) < SC_StopError) {
+- printk(BIOS_DEBUG, "\t\tDCTInit_D: mct_SPDCalcWidth Done\n");
+- if (AutoCycTiming_D(pMCTstat, pDCTstat, dct) < SC_StopError) {
+- printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoCycTiming_D Done\n");
+- if (AutoConfig_D(pMCTstat, pDCTstat, dct) < SC_StopError) {
+- printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoConfig_D Done\n");
+- if (PlatformSpec_D(pMCTstat, pDCTstat, dct) < SC_StopError) {
+- printk(BIOS_DEBUG, "\t\tDCTInit_D: PlatformSpec_D Done\n");
+- stopDCTflag = 0;
+- if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW))) {
+- printk(BIOS_DEBUG, "\t\tDCTInit_D: StartupDCT_D\n");
+- StartupDCT_D(pMCTstat, pDCTstat, dct); /*yeaahhh! */
+- }
++ }
++}
++
++static void DCTInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct)
++{
++ /*
++ * Initialize DRAM on single Athlon 64/Opteron Node.
++ */
++ uint32_t dword;
++
++ if (!is_fam15h()) {
++ /* (Re)-enable DDR3 support */
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94);
++ dword |= 1 << Ddr3Mode;
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword);
++ }
++
++ if (mct_SPDCalcWidth(pMCTstat, pDCTstat, dct) < SC_StopError) {
++ printk(BIOS_DEBUG, "\t\tDCTInit_D: mct_SPDCalcWidth Done\n");
++ if (AutoCycTiming_D(pMCTstat, pDCTstat, dct) < SC_StopError) {
++ printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoCycTiming_D Done\n");
++ if (AutoConfig_D(pMCTstat, pDCTstat, dct) < SC_StopError) {
++ printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoConfig_D Done\n");
++ if (PlatformSpec_D(pMCTstat, pDCTstat, dct) < SC_StopError) {
++ printk(BIOS_DEBUG, "\t\tDCTInit_D: PlatformSpec_D Done\n");
++ pDCTstat->stopDCT = 0;
++ if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW))) {
++ printk(BIOS_DEBUG, "\t\tDCTInit_D: StartupDCT_D\n");
++ StartupDCT_D(pMCTstat, pDCTstat, dct); /*yeaahhh! */
+ }
+ }
+ }
+ }
+ }
+
+- if (stopDCTflag) {
+- u32 reg_off = dct * 0x100;
+- val = 1<<DisDramInterface;
+- Set_NB32(pDCTstat->dev_dct, reg_off+0x94, val);
+- /*To maximize power savings when DisDramInterface=1b,
+- all of the MemClkDis bits should also be set.*/
+- val = 0xFF000000;
+- Set_NB32(pDCTstat->dev_dct, reg_off+0x88, val);
++ if (pDCTstat->stopDCT) {
++ dword = 1 << DisDramInterface;
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword);
++
++ /* To maximize power savings when DisDramInterface=1b,
++ * all of the MemClkDis bits should also be set.
++ */
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x88, 0xff000000);
+ } else {
+ mct_EnDllShutdownSR(pMCTstat, pDCTstat, dct);
+ }
+@@ -876,20 +2332,24 @@ static void SyncDCTsReady_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat = pDCTstatA + Node;
+ mct_SyncDCTsReady(pDCTstat);
+ }
+- /* v6.1.3 */
+- /* re-enable phy compensation engine when dram init is completed on all nodes. */
+- for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+- struct DCTStatStruc *pDCTstat;
+- pDCTstat = pDCTstatA + Node;
+- if (pDCTstat->NodePresent) {
+- if (pDCTstat->DIMMValidDCT[0] > 0 || pDCTstat->DIMMValidDCT[1] > 0) {
+- /* re-enable phy compensation engine when dram init on both DCTs is completed. */
+- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8);
+- val &= ~(1 << DisAutoComp);
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8, val);
++
++ if (!is_fam15h()) {
++ /* v6.1.3 */
++ /* re-enable phy compensation engine when dram init is completed on all nodes. */
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
++ struct DCTStatStruc *pDCTstat;
++ pDCTstat = pDCTstatA + Node;
++ if (pDCTstat->NodePresent) {
++ if (pDCTstat->DIMMValidDCT[0] > 0 || pDCTstat->DIMMValidDCT[1] > 0) {
++ /* re-enable phy compensation engine when dram init on both DCTs is completed. */
++ val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8);
++ val &= ~(1 << DisAutoComp);
++ Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8, val);
++ }
+ }
+ }
+ }
++
+ /* wait 750us before any memory access can be made. */
+ mct_Wait(15000);
+ }
+@@ -911,10 +2371,9 @@ static void StartupDCT_D(struct MCTStatStruc *pMCTstat,
+ */
+ u32 val;
+ u32 dev;
+- u32 reg_off = dct * 0x100;
+
+ dev = pDCTstat->dev_dct;
+- val = Get_NB32(dev, 0x94 + reg_off);
++ val = Get_NB32_DCT(dev, dct, 0x94);
+ if (val & (1<<MemClkFreqVal)) {
+ mctHookBeforeDramInit(); /* generalized Hook */
+ if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)))
+@@ -929,23 +2388,23 @@ static void ClearDCT_D(struct MCTStatStruc *pMCTstat,
+ {
+ u32 reg_end;
+ u32 dev = pDCTstat->dev_dct;
+- u32 reg = 0x40 + 0x100 * dct;
++ u32 reg = 0x40;
+ u32 val = 0;
+
+ if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) {
+- reg_end = 0x78 + 0x100 * dct;
++ reg_end = 0x78;
+ } else {
+- reg_end = 0xA4 + 0x100 * dct;
++ reg_end = 0xA4;
+ }
+
+ while(reg < reg_end) {
+ if ((reg & 0xFF) == 0x90) {
+ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
+- val = Get_NB32(dev, reg); /* get DRAMConfigLow */
++ val = Get_NB32_DCT(dev, dct, reg); /* get DRAMConfigLow */
+ val |= 0x08000000; /* preserve value of DisDllShutdownSR for only Rev.D */
+ }
+ }
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ val = 0;
+ reg += 4;
+ }
+@@ -964,6 +2423,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
+ u16 Trp, Trrd, Trcd, Tras, Trc;
+ u8 Trfc[4];
+ u16 Tfaw;
++ u16 Tcwl; /* Fam15h only */
+ u32 DramTimingLo, DramTimingHi;
+ u8 tCK16x;
+ u16 Twtr;
+@@ -972,10 +2432,11 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
+ u8 byte;
+ u32 dword;
+ u32 dev;
+- u32 reg_off;
+ u32 val;
+ u16 smbaddr;
+
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ /* Gather all DIMM mini-max values for cycle timing data */
+ Trp = 0;
+ Trrd = 0;
+@@ -1188,88 +2649,164 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
+
+ mctAdjustAutoCycTmg_D();
+
++ if (is_fam15h()) {
++ /* Compute Tcwl (Fam15h BKDG v3.14 Table 203) */
++ if (pDCTstat->Speed <= 0x6)
++ Tcwl = 0x5;
++ else if (pDCTstat->Speed == 0xa)
++ Tcwl = 0x6;
++ else if (pDCTstat->Speed == 0xe)
++ Tcwl = 0x7;
++ else if (pDCTstat->Speed == 0x12)
++ Tcwl = 0x8;
++ else if (pDCTstat->Speed == 0x16)
++ Tcwl = 0x9;
++ else
++ Tcwl = 0x5; /* Power-on default */
++ }
++
+ /* Program DRAM Timing values */
+- DramTimingLo = 0; /* Dram Timing Low init */
+- val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */
+- DramTimingLo |= val;
++ if (is_fam15h()) {
++ dev = pDCTstat->dev_dct;
+
+- val = pDCTstat->Trcd - Bias_TrcdT;
+- DramTimingLo |= val<<4;
++ dword = Get_NB32_DCT(dev, dct, 0x8c); /* DRAM Timing High */
++ val = 2; /* Tref = 7.8us */
++ dword &= ~(0x3 << 16);
++ dword |= (val & 0x3) << 16;
++ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */
++
++ dword = Get_NB32_DCT(dev, dct, 0x200); /* DRAM Timing 0 */
++ dword &= ~(0x3f1f1f1f);
++ dword |= ((pDCTstat->Tras + 0xf) & 0x3f) << 24; /* Tras */
++ dword |= ((pDCTstat->Trp + 0x5) & 0x1f) << 16; /* Trp */
++ dword |= ((pDCTstat->Trcd + 0x5) & 0x1f) << 8; /* Trcd */
++ dword |= (pDCTstat->CASL & 0x1f); /* Tcl */
++ Set_NB32_DCT(dev, dct, 0x200, dword); /* DRAM Timing 0 */
++
++ dword = Get_NB32_DCT(dev, dct, 0x204); /* DRAM Timing 1 */
++ dword &= ~(0x0f3f0f3f);
++ dword |= ((pDCTstat->Trtp + 0x4) & 0xf) << 24; /* Trtp */
++ if (pDCTstat->Tfaw != 0)
++ dword |= ((((pDCTstat->Tfaw - 0x1) * 2) + 0x10) & 0x3f) << 16; /* FourActWindow */
++ dword |= ((pDCTstat->Trrd + 0x4) & 0xf) << 8; /* Trrd */
++ dword |= ((pDCTstat->Trc + 0xb) & 0x3f); /* Trc */
++ Set_NB32_DCT(dev, dct, 0x204, dword); /* DRAM Timing 1 */
++
++ dword = Get_NB32_DCT(dev, dct, 0x208); /* DRAM Timing 2 */
++ dword &= ~(0x07070707);
++ dword |= (pDCTstat->Trfc[3] & 0x7) << 24; /* Trfc3 */
++ dword |= (pDCTstat->Trfc[2] & 0x7) << 16; /* Trfc2 */
++ dword |= (pDCTstat->Trfc[1] & 0x7) << 8; /* Trfc1 */
++ dword |= (pDCTstat->Trfc[0] & 0x7); /* Trfc0 */
++ Set_NB32_DCT(dev, dct, 0x208, dword); /* DRAM Timing 2 */
++
++ dword = Get_NB32_DCT(dev, dct, 0x20c); /* DRAM Timing 3 */
++ dword &= ~(0x00000f00);
++ dword |= ((pDCTstat->Twtr + 0x4) & 0xf) << 8; /* Twtr */
++ dword &= ~(0x0000001f);
++ dword |= (Tcwl & 0x1f); /* Tcwl */
++ Set_NB32_DCT(dev, dct, 0x20c, dword); /* DRAM Timing 3 */
++
++ dword = Get_NB32_DCT(dev, dct, 0x22c); /* DRAM Timing 10 */
++ dword &= ~(0x0000001f);
++ dword |= ((pDCTstat->Twr + 0x4) & 0x1f); /* Twr */
++ Set_NB32_DCT(dev, dct, 0x22c, dword); /* DRAM Timing 10 */
++
++ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
++ /* Enable phy-assisted training mode */
++ fam15EnableTrainingMode(pMCTstat, pDCTstat, dct, 1);
++ }
+
+- val = pDCTstat->Trp - Bias_TrpT;
+- val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
+- DramTimingLo |= val<<7;
++ /* Other setup (not training specific) */
++ dword = Get_NB32_DCT(dev, dct, 0x90); /* DRAM Configuration Low */
++ dword &= ~(0x1 << 23); /* ForceAutoPchg = 0 */
++ dword &= ~(0x1 << 20); /* DynPageCloseEn = 0 */
++ Set_NB32_DCT(dev, dct, 0x90, dword); /* DRAM Configuration Low */
+
+- val = pDCTstat->Trtp - Bias_TrtpT;
+- DramTimingLo |= val<<10;
++ Set_NB32_DCT(dev, dct, 0x228, 0x14141414); /* DRAM Timing 9 */
++ } else {
++ DramTimingLo = 0; /* Dram Timing Low init */
++ val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */
++ DramTimingLo |= val;
+
+- val = pDCTstat->Tras - Bias_TrasT;
+- DramTimingLo |= val<<12;
++ val = pDCTstat->Trcd - Bias_TrcdT;
++ DramTimingLo |= val<<4;
+
+- val = pDCTstat->Trc - Bias_TrcT;
+- DramTimingLo |= val<<16;
++ val = pDCTstat->Trp - Bias_TrpT;
++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
++ DramTimingLo |= val<<7;
+
+- val = pDCTstat->Trrd - Bias_TrrdT;
+- DramTimingLo |= val<<22;
++ val = pDCTstat->Trtp - Bias_TrtpT;
++ DramTimingLo |= val<<10;
+
+- DramTimingHi = 0; /* Dram Timing High init */
+- val = pDCTstat->Twtr - Bias_TwtrT;
+- DramTimingHi |= val<<8;
++ val = pDCTstat->Tras - Bias_TrasT;
++ DramTimingLo |= val<<12;
+
+- val = 2;
+- DramTimingHi |= val<<16;
++ val = pDCTstat->Trc - Bias_TrcT;
++ DramTimingLo |= val<<16;
+
+- val = 0;
+- for (i=4;i>0;i--) {
+- val <<= 3;
+- val |= Trfc[i-1];
+- }
+- DramTimingHi |= val << 20;
++ val = pDCTstat->Trrd - Bias_TrrdT;
++ DramTimingLo |= val<<22;
+
+- dev = pDCTstat->dev_dct;
+- reg_off = 0x100 * dct;
+- /* Twr */
+- val = pDCTstat->Twr;
+- if (val == 10)
+- val = 9;
+- else if (val == 12)
+- val = 10;
+- val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
+- val -= Bias_TwrT;
+- val <<= 4;
+- dword = Get_NB32(dev, 0x84 + reg_off);
+- dword &= ~0x70;
+- dword |= val;
+- Set_NB32(dev, 0x84 + reg_off, dword);
++ DramTimingHi = 0; /* Dram Timing High init */
++ val = pDCTstat->Twtr - Bias_TwtrT;
++ DramTimingHi |= val<<8;
+
+- /* Tfaw */
+- val = pDCTstat->Tfaw;
+- val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
+- val -= Bias_TfawT;
+- val >>= 1;
+- val <<= 28;
+- dword = Get_NB32(dev, 0x94 + reg_off);
+- dword &= ~0xf0000000;
+- dword |= val;
+- Set_NB32(dev, 0x94 + reg_off, dword);
+-
+- /* dev = pDCTstat->dev_dct; */
+- /* reg_off = 0x100 * dct; */
+-
+- if (pDCTstat->Speed > 4) {
+- val = Get_NB32(dev, 0x88 + reg_off);
+- val &= 0xFF000000;
+- DramTimingLo |= val;
+- }
+- Set_NB32(dev, 0x88 + reg_off, DramTimingLo); /*DCT Timing Low*/
++ val = 2; /* Tref = 7.8us */
++ DramTimingHi |= val<<16;
++
++ val = 0;
++ for (i=4;i>0;i--) {
++ val <<= 3;
++ val |= Trfc[i-1];
++ }
++ DramTimingHi |= val << 20;
++
++ dev = pDCTstat->dev_dct;
++ /* Twr */
++ val = pDCTstat->Twr;
++ if (val == 10)
++ val = 9;
++ else if (val == 12)
++ val = 10;
++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
++ val -= Bias_TwrT;
++ val <<= 4;
++ dword = Get_NB32_DCT(dev, dct, 0x84);
++ dword &= ~0x70;
++ dword |= val;
++ Set_NB32_DCT(dev, dct, 0x84, dword);
++
++ /* Tfaw */
++ val = pDCTstat->Tfaw;
++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
++ val -= Bias_TfawT;
++ val >>= 1;
++ val <<= 28;
++ dword = Get_NB32_DCT(dev, dct, 0x94);
++ dword &= ~0xf0000000;
++ dword |= val;
++ Set_NB32_DCT(dev, dct, 0x94, dword);
++
++ /* dev = pDCTstat->dev_dct; */
++
++ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
++ val = Get_NB32_DCT(dev, dct, 0x88);
++ val &= 0xFF000000;
++ DramTimingLo |= val;
++ }
++ Set_NB32_DCT(dev, dct, 0x88, DramTimingLo); /*DCT Timing Low*/
+
+- if (pDCTstat->Speed > 4) {
+- DramTimingHi |= 1 << DisAutoRefresh;
++ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
++ DramTimingHi |= 1 << DisAutoRefresh;
++ }
++ DramTimingHi |= 0x000018FF;
++ Set_NB32_DCT(dev, dct, 0x8c, DramTimingHi); /*DCT Timing Hi*/
+ }
+- DramTimingHi |= 0x000018FF;
+- Set_NB32(dev, 0x8c + reg_off, DramTimingHi); /*DCT Timing Hi*/
+
+ /* dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); */
++
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+
+ static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat,
+@@ -1303,6 +2840,8 @@ static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat,
+ * timing mode is 'Auto'.
+ */
+
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ /* Get primary timing (CAS Latency and Cycle Time) */
+ if (pDCTstat->Speed == 0) {
+ mctGet_MaxLoadFreq(pDCTstat);
+@@ -1312,6 +2851,7 @@ static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat,
+
+ /* Go get best T and CL as specified by DIMM mfgs. and OEM */
+ SPDGetTCL_D(pMCTstat, pDCTstat, dct);
++
+ /* skip callback mctForce800to1067_D */
+ pDCTstat->Speed = pDCTstat->DIMMAutoSpeed;
+ pDCTstat->CASL = pDCTstat->DIMMCASL;
+@@ -1344,7 +2884,10 @@ static void GetPresetmaxF_D(struct MCTStatStruc *pMCTstat,
+ u16 word;
+
+ /* Get CPU Si Revision defined limit (NPT) */
+- proposedFreq = 800; /* Rev F0 programmable max memclock is */
++ if (is_fam15h())
++ proposedFreq = 933;
++ else
++ proposedFreq = 800; /* Rev F0 programmable max memclock is */
+
+ /*Get User defined limit if "limit" mode */
+ if ( mctGet_NVbits(NV_MCTUSRTMGMODE) == 1) {
+@@ -1381,6 +2924,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ u16 tCKmin16x;
+ u16 tCKproposed16x;
+ u8 CLactual, CLdesired, CLT_Fail;
++ uint16_t min_frequency_tck16x;
+
+ u8 smbaddr, byte = 0, bytex = 0;
+
+@@ -1390,6 +2934,17 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ tCKmin16x = 0;
+ CLT_Fail = 0;
+
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
++ if (is_fam15h()) {
++ uint16_t minimum_frequency_mhz = mctGet_NVbits(NV_MIN_MEMCLK);
++ if (minimum_frequency_mhz == 0)
++ minimum_frequency_mhz = 333;
++ min_frequency_tck16x = 16000 / minimum_frequency_mhz;
++ } else {
++ min_frequency_tck16x = 40;
++ }
++
+ for (i = 0; i < MAX_DIMMS_SUPPORTED; i++) {
+ if (pDCTstat->DIMMValid & (1 << i)) {
+ smbaddr = Get_DIMMAddress_D(pDCTstat, (dct + i));
+@@ -1419,27 +2974,44 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ tCKmin16x = byte * MTB16x;
+ }
+ }
+- /* calculate tCKproposed16x */
++ /* calculate tCKproposed16x (proposed clock period in ns * 16) */
+ tCKproposed16x = 16000 / pDCTstat->PresetmaxFreq;
+ if (tCKmin16x > tCKproposed16x)
+ tCKproposed16x = tCKmin16x;
+
+- /* mctHookTwo1333DimmOverride(); */
+- /* For UDIMM, if there are two DDR3-1333 on the same channel,
+- downgrade DDR speed to 1066. */
+-
+ /* TODO: get user manual tCK16x(Freq.) and overwrite current tCKproposed16x if manual. */
+- if (tCKproposed16x == 20)
+- pDCTstat->TargetFreq = 7;
+- else if (tCKproposed16x <= 24) {
+- pDCTstat->TargetFreq = 6;
+- tCKproposed16x = 24;
+- } else if (tCKproposed16x <= 30) {
+- pDCTstat->TargetFreq = 5;
+- tCKproposed16x = 30;
++ if (is_fam15h()) {
++ if (tCKproposed16x == 17)
++ pDCTstat->TargetFreq = 0x16;
++ else if (tCKproposed16x <= 20) {
++ pDCTstat->TargetFreq = 0x12;
++ tCKproposed16x = 20;
++ } else if (tCKproposed16x <= 24) {
++ pDCTstat->TargetFreq = 0xe;
++ tCKproposed16x = 24;
++ } else if (tCKproposed16x <= 30) {
++ pDCTstat->TargetFreq = 0xa;
++ tCKproposed16x = 30;
++ } else if (tCKproposed16x <= 40) {
++ pDCTstat->TargetFreq = 0x6;
++ tCKproposed16x = 40;
++ } else {
++ pDCTstat->TargetFreq = 0x4;
++ tCKproposed16x = 48;
++ }
+ } else {
+- pDCTstat->TargetFreq = 4;
+- tCKproposed16x = 40;
++ if (tCKproposed16x == 20)
++ pDCTstat->TargetFreq = 7;
++ else if (tCKproposed16x <= 24) {
++ pDCTstat->TargetFreq = 6;
++ tCKproposed16x = 24;
++ } else if (tCKproposed16x <= 30) {
++ pDCTstat->TargetFreq = 5;
++ tCKproposed16x = 30;
++ } else {
++ pDCTstat->TargetFreq = 4;
++ tCKproposed16x = 40;
++ }
+ }
+ /* Running through this loop twice:
+ - First time find tCL at target frequency
+@@ -1478,27 +3050,42 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ /* get CL and T */
+ if (!CLT_Fail) {
+ bytex = CLactual;
+- if (tCKproposed16x == 20)
+- byte = 7;
+- else if (tCKproposed16x == 24)
+- byte = 6;
+- else if (tCKproposed16x == 30)
+- byte = 5;
+- else
+- byte = 4;
++ if (is_fam15h()) {
++ if (tCKproposed16x == 17)
++ byte = 0x16;
++ else if (tCKproposed16x == 20)
++ byte = 0x12;
++ else if (tCKproposed16x == 24)
++ byte = 0xe;
++ else if (tCKproposed16x == 30)
++ byte = 0xa;
++ else if (tCKproposed16x == 40)
++ byte = 0x6;
++ else
++ byte = 0x4;
++ } else {
++ if (tCKproposed16x == 20)
++ byte = 7;
++ else if (tCKproposed16x == 24)
++ byte = 6;
++ else if (tCKproposed16x == 30)
++ byte = 5;
++ else
++ byte = 4;
++ }
+ } else {
+ /* mctHookManualCLOverride */
+ /* TODO: */
+ }
+
+- if (tCKproposed16x != 40) {
++ if (tCKproposed16x != min_frequency_tck16x) {
+ if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) {
+ pDCTstat->DIMMAutoSpeed = byte;
+ pDCTstat->DIMMCASL = bytex;
+ break;
+ } else {
+ pDCTstat->TargetCASL = bytex;
+- tCKproposed16x = 40;
++ tCKproposed16x = min_frequency_tck16x;
+ }
+ } else {
+ pDCTstat->DIMMAutoSpeed = byte;
+@@ -1519,29 +3106,21 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ static u8 PlatformSpec_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 dev;
+- u32 reg;
+- u32 val;
++ if (!is_fam15h()) {
++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct);
+
+- mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct);
++ if (pDCTstat->GangedMode == 1) {
++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1);
++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, 1);
++ }
+
+- if (pDCTstat->GangedMode == 1) {
+- mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1);
+- mct_BeforePlatformSpec(pMCTstat, pDCTstat, 1);
+- }
++ set_2t_configuration(pMCTstat, pDCTstat, dct);
+
+- if ( pDCTstat->_2Tmode == 2) {
+- dev = pDCTstat->dev_dct;
+- reg = 0x94 + 0x100 * dct; /* Dram Configuration Hi */
+- val = Get_NB32(dev, reg);
+- val |= 1 << 20; /* 2T CMD mode */
+- Set_NB32(dev, reg, val);
++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, dct);
++ mct_PlatformSpec(pMCTstat, pDCTstat, dct);
++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK)))
++ InitPhyCompensation(pMCTstat, pDCTstat, dct);
+ }
+-
+- mct_BeforePlatformSpec(pMCTstat, pDCTstat, dct);
+- mct_PlatformSpec(pMCTstat, pDCTstat, dct);
+- if (pDCTstat->DIMMAutoSpeed == 4)
+- InitPhyCompensation(pMCTstat, pDCTstat, dct);
+ mctHookAfterPSCfg();
+
+ return pDCTstat->ErrCode;
+@@ -1553,11 +3132,11 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ u32 DramControl, DramTimingLo, Status;
+ u32 DramConfigLo, DramConfigHi, DramConfigMisc, DramConfigMisc2;
+ u32 val;
+- u32 reg_off;
+ u32 dev;
+ u16 word;
+ u32 dword;
+ u8 byte;
++ uint32_t offset;
+
+ DramConfigLo = 0;
+ DramConfigHi = 0;
+@@ -1577,12 +3156,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ Status = pDCTstat->Status;
+
+ dev = pDCTstat->dev_dct;
+- reg_off = 0x100 * dct;
+-
+
+ /* Build Dram Control Register Value */
+- DramConfigMisc2 = Get_NB32 (dev, 0xA8 + reg_off); /* Dram Control*/
+- DramControl = Get_NB32 (dev, 0x78 + reg_off); /* Dram Control*/
++ DramConfigMisc2 = Get_NB32_DCT(dev, dct, 0xA8); /* Dram Control*/
++ DramControl = Get_NB32_DCT(dev, dct, 0x78); /* Dram Control*/
+
+ /* FIXME: Skip mct_checkForDxSupport */
+ /* REV_CALL mct_DoRdPtrInit if not Dx */
+@@ -1624,8 +3201,12 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ DramConfigLo = mct_DisDllShutdownSR(pMCTstat, pDCTstat, DramConfigLo, dct);
+
+ /* Build Dram Config Hi Register Value */
++ if (is_fam15h())
++ offset = 0x0;
++ else
++ offset = 0x1;
+ dword = pDCTstat->Speed;
+- DramConfigHi |= dword - 1; /* get MemClk encoding */
++ DramConfigHi |= dword - offset; /* get MemClk encoding */
+ DramConfigHi |= 1 << MemClkFreqVal;
+
+ if (Status & (1 << SB_Registered))
+@@ -1658,7 +3239,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ val = 0x0f; /* recommended setting (default) */
+ DramConfigHi |= val << 24;
+
+- if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx))
++ if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx | AMD_FAM15_ALL))
+ DramConfigHi |= 1 << DcqArbBypassEn;
+
+ /* Build MemClkDis Value from Dram Timing Lo and
+@@ -1669,7 +3250,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ NV_AllMemClks <>0 AND SB_DiagClks ==0 */
+
+ /* Dram Timing Low (owns Clock Enable bits) */
+- DramTimingLo = Get_NB32(dev, 0x88 + reg_off);
++ DramTimingLo = Get_NB32_DCT(dev, dct, 0x88);
+ if (mctGet_NVbits(NV_AllMemClks) == 0) {
+ /* Special Jedec SPD diagnostic bit - "enable all clocks" */
+ if (!(pDCTstat->Status & (1<<SB_DiagClks))) {
+@@ -1700,28 +3281,34 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ }
+ dword++ ;
+ }
++ DramTimingLo &= ~(0xff << 24);
+ DramTimingLo |= byte << 24;
+ }
+ }
+
+- printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %x\n", DramControl);
+- printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %x\n", DramTimingLo);
+- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %x\n", DramConfigMisc);
+- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc2: %x\n", DramConfigMisc2);
+- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigLo: %x\n", DramConfigLo);
+- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigHi: %x\n", DramConfigHi);
++ printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %08x\n", DramControl);
++ printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %08x\n", DramTimingLo);
++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %08x\n", DramConfigMisc);
++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc2: %08x\n", DramConfigMisc2);
++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigLo: %08x\n", DramConfigLo);
++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigHi: %08x\n", DramConfigHi);
+
+ /* Write Values to the registers */
+- Set_NB32(dev, 0x78 + reg_off, DramControl);
+- Set_NB32(dev, 0x88 + reg_off, DramTimingLo);
+- Set_NB32(dev, 0xA0 + reg_off, DramConfigMisc);
++ Set_NB32_DCT(dev, dct, 0x78, DramControl);
++ Set_NB32_DCT(dev, dct, 0x88, DramTimingLo);
++ Set_NB32_DCT(dev, dct, 0xa0, DramConfigMisc);
+ DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2);
+- Set_NB32(dev, 0xA8 + reg_off, DramConfigMisc2);
+- Set_NB32(dev, 0x90 + reg_off, DramConfigLo);
++ Set_NB32_DCT(dev, dct, 0xa8, DramConfigMisc2);
++ Set_NB32_DCT(dev, dct, 0x90, DramConfigLo);
+ ProgDramMRSReg_D(pMCTstat, pDCTstat, dct);
+- dword = Get_NB32(dev, 0x94 + reg_off);
++
++ if (is_fam15h())
++ InitDDRPhy(pMCTstat, pDCTstat, dct);
++
++ /* Write the DRAM Configuration High register, including memory frequency change */
++ dword = Get_NB32_DCT(dev, dct, 0x94);
+ DramConfigHi |= dword;
+- mct_SetDramConfigHi_D(pDCTstat, dct, DramConfigHi);
++ mct_SetDramConfigHi_D(pMCTstat, pDCTstat, dct, DramConfigHi);
+ mct_EarlyArbEn_D(pMCTstat, pDCTstat, dct);
+ mctHookAfterAutoCfg();
+
+@@ -1731,6 +3318,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ printk(BIOS_DEBUG, "AutoConfig: ErrStatus %x\n", pDCTstat->ErrStatus);
+ printk(BIOS_DEBUG, "AutoConfig: ErrCode %x\n", pDCTstat->ErrCode);
+ printk(BIOS_DEBUG, "AutoConfig: Done\n\n");
++
+ AutoConfig_exit:
+ return pDCTstat->ErrCode;
+ }
+@@ -1748,14 +3336,12 @@ static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat,
+ u32 val;
+ u32 reg;
+ u32 dev;
+- u32 reg_off;
+ u8 byte;
+ u16 word;
+ u32 dword;
+ u16 smbaddr;
+
+ dev = pDCTstat->dev_dct;
+- reg_off = 0x100 * dct;
+
+ BankAddrReg = 0;
+ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel+=2) {
+@@ -1820,10 +3406,10 @@ static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat,
+ /*set ChipSelect population indicator odd bits*/
+ pDCTstat->CSPresent |= 1 << (ChipSel + 1);
+
+- reg = 0x60+(ChipSel<<1) + reg_off; /*Dram CS Mask Register */
++ reg = 0x60+(ChipSel<<1); /*Dram CS Mask Register */
+ val = csMask;
+ val &= 0x1FF83FE0; /* Mask out reserved bits.*/
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ } else {
+ if (pDCTstat->DIMMSPDCSE & (1<<ChipSel))
+ pDCTstat->CSTestFail |= (1<<ChipSel);
+@@ -1847,8 +3433,8 @@ static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat,
+ if (!pDCTstat->CSPresent)
+ pDCTstat->ErrCode = SC_StopError;
+
+- reg = 0x80 + reg_off; /* Bank Addressing Register */
+- Set_NB32(dev, reg, BankAddrReg);
++ reg = 0x80; /* Bank Addressing Register */
++ Set_NB32_DCT(dev, dct, reg, BankAddrReg);
+
+ pDCTstat->CSPresent_DCT[dct] = pDCTstat->CSPresent;
+ /* dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); */
+@@ -1933,11 +3519,9 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat,
+ u16 word;
+ u32 dev;
+ u32 reg;
+- u32 reg_off;
+ u32 val;
+
+ dev = pDCTstat->dev_dct;
+- reg_off = 0x100 * dct;
+
+ _DSpareEn = 0;
+
+@@ -1974,11 +3558,11 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat,
+ BiggestBank = 0;
+ for (q = 0; q < MAX_CS_SUPPORTED; q++) { /* from DIMMS to CS */
+ if (pDCTstat->CSPresent & (1 << q)) { /* bank present? */
+- reg = 0x40 + (q << 2) + reg_off; /* Base[q] reg.*/
+- val = Get_NB32(dev, reg);
++ reg = 0x40 + (q << 2); /* Base[q] reg.*/
++ val = Get_NB32_DCT(dev, dct, reg);
+ if (!(val & 3)) { /* (CSEnable|Spare==1)bank is enabled already? */
+- reg = 0x60 + (q << 1) + reg_off; /*Mask[q] reg.*/
+- val = Get_NB32(dev, reg);
++ reg = 0x60 + (q << 1); /*Mask[q] reg.*/
++ val = Get_NB32_DCT(dev, dct, reg);
+ val >>= 19;
+ val++;
+ val <<= 19;
+@@ -1994,7 +3578,7 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat,
+ if (BiggestBank !=0) {
+ curcsBase = nxtcsBase; /* curcsBase=nxtcsBase*/
+ /* DRAM CS Base b Address Register offset */
+- reg = 0x40 + (b << 2) + reg_off;
++ reg = 0x40 + (b << 2);
+ if (_DSpareEn) {
+ BiggestBank = 0;
+ val = 1 << Spare; /* Spare Enable*/
+@@ -2013,7 +3597,7 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat,
+ }
+ }
+ }
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ if (_DSpareEn)
+ _DSpareEn = 0;
+ else
+@@ -2024,9 +3608,9 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat,
+ /* bank present but disabled?*/
+ if ( pDCTstat->CSTestFail & (1 << p)) {
+ /* DRAM CS Base b Address Register offset */
+- reg = (p << 2) + 0x40 + reg_off;
++ reg = (p << 2) + 0x40;
+ val = 1 << TestFail;
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ }
+ }
+
+@@ -2064,7 +3648,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ u16 i, j, k;
+ u8 smbaddr;
+ u8 SPDCtrl;
+- u16 RegDIMMPresent, MaxDimms;
++ u16 RegDIMMPresent, LRDIMMPresent, MaxDimms;
+ u8 devwidth;
+ u16 DimmSlots;
+ u8 byte = 0, bytex;
+@@ -2077,6 +3661,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ SPDCtrl = mctGet_NVbits(NV_SPDCHK_RESTRT);
+
+ RegDIMMPresent = 0;
++ LRDIMMPresent = 0;
+ pDCTstat->DimmQRPresent = 0;
+
+ for (i = 0; i < MAX_DIMMS_SUPPORTED; i++) {
+@@ -2115,6 +3700,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->DimmManufacturerID[i] |= ((uint64_t)mctRead_SPD(smbaddr, SPD_MANID_START + k)) << (k * 8);
+ for (k = 0; k < SPD_PARTN_LENGTH; k++)
+ pDCTstat->DimmPartNumber[i][k] = mctRead_SPD(smbaddr, SPD_PARTN_START + k);
++ pDCTstat->DimmPartNumber[i][SPD_PARTN_LENGTH] = 0;
+ pDCTstat->DimmRevisionNumber[i] = 0;
+ for (k = 0; k < 2; k++)
+ pDCTstat->DimmRevisionNumber[i] |= ((uint16_t)mctRead_SPD(smbaddr, SPD_REVNO_START + k)) << (k * 8);
+@@ -2138,6 +3724,12 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ } else {
+ pDCTstat->DimmRegistered[i] = 0;
+ }
++ if (byte == JED_LRDIMM) {
++ LRDIMMPresent |= 1 << i;
++ pDCTstat->DimmLoadReduced[i] = 1;
++ } else {
++ pDCTstat->DimmLoadReduced[i] = 0;
++ }
+ /* Check ECC capable */
+ byte = mctRead_SPD(smbaddr, SPD_BusWidth);
+ if (byte & JED_ECC) {
+@@ -2221,6 +3813,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ printk(BIOS_DEBUG, "\t DIMMPresence: DIMMValid=%x\n", pDCTstat->DIMMValid);
+ printk(BIOS_DEBUG, "\t DIMMPresence: DIMMPresent=%x\n", pDCTstat->DIMMPresent);
+ printk(BIOS_DEBUG, "\t DIMMPresence: RegDIMMPresent=%x\n", RegDIMMPresent);
++ printk(BIOS_DEBUG, "\t DIMMPresence: LRDIMMPresent=%x\n", LRDIMMPresent);
+ printk(BIOS_DEBUG, "\t DIMMPresence: DimmECCPresent=%x\n", pDCTstat->DimmECCPresent);
+ printk(BIOS_DEBUG, "\t DIMMPresence: DimmPARPresent=%x\n", pDCTstat->DimmPARPresent);
+ printk(BIOS_DEBUG, "\t DIMMPresence: Dimmx4Present=%x\n", pDCTstat->Dimmx4Present);
+@@ -2247,6 +3840,16 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->Status |= 1<<SB_Registered;
+ }
+ }
++ if (LRDIMMPresent != 0) {
++ if ((LRDIMMPresent ^ pDCTstat->DIMMValid) !=0) {
++ /* module type DIMM mismatch (reg'ed, unbuffered) */
++ pDCTstat->ErrStatus |= 1<<SB_DimmMismatchM;
++ pDCTstat->ErrCode = SC_StopError;
++ } else{
++ /* all DIMMs are registered */
++ pDCTstat->Status |= 1<<SB_LoadReduced;
++ }
++ }
+ if (pDCTstat->DimmECCPresent != 0) {
+ if ((pDCTstat->DimmECCPresent ^ pDCTstat->DIMMValid )== 0) {
+ /* all DIMMs are ECC capable */
+@@ -2284,6 +3887,26 @@ static u8 Get_DIMMAddress_D(struct DCTStatStruc *pDCTstat, u8 i)
+ return p[i];
+ }
+
++static void mct_preInitDCT(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat)
++{
++ u8 err_code;
++
++ /* Preconfigure DCT0 */
++ DCTPreInit_D(pMCTstat, pDCTstat, 0);
++
++ /* Configure DCT1 if unganged and enabled*/
++ if (!pDCTstat->GangedMode) {
++ if (pDCTstat->DIMMValidDCT[1] > 0) {
++ err_code = pDCTstat->ErrCode; /* save DCT0 errors */
++ pDCTstat->ErrCode = 0;
++ DCTPreInit_D(pMCTstat, pDCTstat, 1);
++ if (pDCTstat->ErrCode == 2) /* DCT1 is not Running */
++ pDCTstat->ErrCode = err_code; /* Using DCT0 Error code to update pDCTstat.ErrCode */
++ }
++ }
++}
++
+ static void mct_initDCT(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+ {
+@@ -2295,7 +3918,7 @@ static void mct_initDCT(struct MCTStatStruc *pMCTstat,
+ if (pDCTstat->ErrCode == SC_FatalErr) {
+ /* Do nothing goto exitDCTInit; any fatal errors? */
+ } else {
+- /* Configure DCT1 if unganged and enabled*/
++ /* Configure DCT1 if unganged and enabled */
+ if (!pDCTstat->GangedMode) {
+ if (pDCTstat->DIMMValidDCT[1] > 0) {
+ err_code = pDCTstat->ErrCode; /* save DCT0 errors */
+@@ -2305,17 +3928,21 @@ static void mct_initDCT(struct MCTStatStruc *pMCTstat,
+ pDCTstat->ErrCode = err_code; /* Using DCT0 Error code to update pDCTstat.ErrCode */
+ } else {
+ val = 1 << DisDramInterface;
+- Set_NB32(pDCTstat->dev_dct, 0x100 + 0x94, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val);
++
++ /* To maximize power savings when DisDramInterface=1b,
++ * all of the MemClkDis bits should also be set.
++ */
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x88, 0xff000000);
+ }
+ }
+ }
+-/* exitDCTInit: */
+ }
+
+ static void mct_DramInit(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- mct_BeforeDramInit_Prod_D(pMCTstat, pDCTstat);
++ mct_BeforeDramInit_Prod_D(pMCTstat, pDCTstat, dct);
+ mct_DramInit_Sw_D(pMCTstat, pDCTstat, dct);
+ /* mct_DramInit_Hw_D(pMCTstat, pDCTstat, dct); */
+ }
+@@ -2343,7 +3970,8 @@ static u8 mct_setMode(struct MCTStatStruc *pMCTstat,
+ if (byte)
+ pDCTstat->ErrStatus |= (1 << SB_DimmMismatchO); /* Set temp. to avoid setting of ganged mode */
+
+- if (!(pDCTstat->ErrStatus & (1 << SB_DimmMismatchO))) {
++ if ((!(pDCTstat->ErrStatus & (1 << SB_DimmMismatchO))) && (pDCTstat->LogicalCPUID & AMD_FAM10_ALL)) {
++ /* Ganged channel mode not supported on Family 15h or higher */
+ pDCTstat->GangedMode = 1;
+ /* valid 128-bit mode population. */
+ pDCTstat->Status |= 1 << SB_128bitmode;
+@@ -2387,10 +4015,8 @@ void Set_NB32_index(u32 dev, u32 index_reg, u32 index, u32 data)
+
+ u32 Get_NB32_index_wait(u32 dev, u32 index_reg, u32 index)
+ {
+-
+ u32 dword;
+
+-
+ index &= ~(1 << DctAccessWrite);
+ Set_NB32(dev, index_reg, index);
+ do {
+@@ -2405,7 +4031,6 @@ void Set_NB32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data)
+ {
+ u32 dword;
+
+-
+ Set_NB32(dev, index_reg + 0x4, data);
+ index |= (1 << DctAccessWrite);
+ Set_NB32(dev, index_reg, index);
+@@ -2420,16 +4045,17 @@ static u8 mct_BeforePlatformSpec(struct MCTStatStruc *pMCTstat,
+ {
+ /* mct_checkForCxDxSupport_D */
+ if (pDCTstat->LogicalCPUID & AMD_DR_GT_Bx) {
++ /* Family 10h Errata 322: Address and Command Fine Delay Values May Be Incorrect */
+ /* 1. Write 00000000h to F2x[1,0]9C_xD08E000 */
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + dct * 0x100, 0x0D08E000, 0);
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D08E000, 0);
+ /* 2. If DRAM Configuration Register[MemClkFreq] (F2x[1,0]94[2:0]) is
+ greater than or equal to 011b (DDR-800 and higher),
+ then write 00000080h to F2x[1,0]9C_xD02E001,
+ else write 00000090h to F2x[1,0]9C_xD02E001. */
+- if (pDCTstat->Speed >= 4)
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + dct * 0x100, 0xD02E001, 0x80);
++ if (pDCTstat->Speed >= mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK)))
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D02E001, 0x80);
+ else
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + dct * 0x100, 0xD02E001, 0x90);
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D02E001, 0x90);
+ }
+ return pDCTstat->ErrCode;
+ }
+@@ -2455,9 +4081,9 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat,
+ i_end = dct + 1;
+ }
+ for (i=i_start; i<i_end; i++) {
+- index_reg = 0x98 + (i * 0x100);
+- Set_NB32_index_wait(dev, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]); /* Channel A Output Driver Compensation Control */
+- Set_NB32_index_wait(dev, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]); /* Channel A Output Driver Compensation Control */
++ index_reg = 0x98;
++ Set_NB32_index_wait_DCT(dev, i, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]); /* Channel A Output Driver Compensation Control */
++ Set_NB32_index_wait_DCT(dev, i, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]); /* Channel A Output Driver Compensation Control */
+ }
+
+ return pDCTstat->ErrCode;
+@@ -2511,14 +4137,14 @@ static u8 mct_SPDCalcWidth(struct MCTStatStruc *pMCTstat,
+ }
+
+ if (pDCTstat->DIMMValidDCT[0] == 0) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
+ val |= 1 << DisDramInterface;
+- Set_NB32(pDCTstat->dev_dct, 0x94, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val);
+ }
+ if (pDCTstat->DIMMValidDCT[1] == 0) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
+ val |= 1 << DisDramInterface;
+- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val);
+ }
+
+ printk(BIOS_DEBUG, "SPDCalcWidth: Status %x\n", pDCTstat->Status);
+@@ -2648,21 +4274,20 @@ static void Set_OtherTiming(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+ u32 reg;
+- u32 reg_off = 0x100 * dct;
+ u32 val;
+ u32 dword;
+ u32 dev = pDCTstat->dev_dct;
+
+- Get_DqsRcvEnGross_Diff(pDCTstat, dev, 0x98 + reg_off);
+- Get_WrDatGross_Diff(pDCTstat, dct, dev, 0x98 + reg_off);
++ Get_DqsRcvEnGross_Diff(pDCTstat, dev, dct, 0x98);
++ Get_WrDatGross_Diff(pDCTstat, dct, dev, 0x98);
+ Get_Trdrd(pMCTstat, pDCTstat, dct);
+ Get_Twrwr(pMCTstat, pDCTstat, dct);
+ Get_Twrrd(pMCTstat, pDCTstat, dct);
+ Get_TrwtTO(pMCTstat, pDCTstat, dct);
+ Get_TrwtWB(pMCTstat, pDCTstat);
+
+- reg = 0x8C + reg_off; /* Dram Timing Hi */
+- val = Get_NB32(dev, reg);
++ reg = 0x8C; /* Dram Timing Hi */
++ val = Get_NB32_DCT(dev, dct, reg);
+ val &= 0xffff0300;
+ dword = pDCTstat->TrwtTO;
+ val |= dword << 4;
+@@ -2674,10 +4299,10 @@ static void Set_OtherTiming(struct MCTStatStruc *pMCTstat,
+ val |= dword << 14;
+ dword = pDCTstat->TrwtWB;
+ val |= dword;
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+
+- reg = 0x78 + reg_off;
+- val = Get_NB32(dev, reg);
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, dct, reg);
+ val &= 0xFFFFC0FF;
+ dword = pDCTstat->Twrrd >> 2;
+ val |= dword << 8;
+@@ -2685,7 +4310,7 @@ static void Set_OtherTiming(struct MCTStatStruc *pMCTstat,
+ val |= dword << 10;
+ dword = pDCTstat->Trdrd >> 2;
+ val |= dword << 12;
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ }
+
+ static void Get_Trdrd(struct MCTStatStruc *pMCTstat,
+@@ -2755,18 +4380,17 @@ static void Get_TrwtWB(struct MCTStatStruc *pMCTstat,
+ static u8 Get_Latency_Diff(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 val1, val2;
+
+- val1 = Get_NB32(dev, reg_off + 0x88) & 0xF;
+- val2 = (Get_NB32(dev, reg_off + 0x84) >> 20) & 7;
++ val1 = Get_NB32_DCT(dev, dct, 0x88) & 0xF;
++ val2 = (Get_NB32_DCT(dev, dct, 0x84) >> 20) & 7;
+
+ return val1 - val2;
+ }
+
+ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat,
+- u32 dev, u32 index_reg)
++ u32 dev, uint8_t dct, u32 index_reg)
+ {
+ u8 Smallest, Largest;
+ u32 val;
+@@ -2776,12 +4400,12 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat,
+ DqsRcvEnGrossDelay of any other DIMM is equal to the Critical
+ Gross Delay Difference (CGDD) */
+ /* DqsRcvEn byte 1,0 */
+- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x10);
++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x10);
+ Largest = val & 0xFF;
+ Smallest = (val >> 8) & 0xFF;
+
+ /* DqsRcvEn byte 3,2 */
+- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x11);
++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x11);
+ byte = val & 0xFF;
+ bytex = (val >> 8) & 0xFF;
+ if (bytex < Smallest)
+@@ -2790,7 +4414,7 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat,
+ Largest = byte;
+
+ /* DqsRcvEn byte 5,4 */
+- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x20);
++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x20);
+ byte = val & 0xFF;
+ bytex = (val >> 8) & 0xFF;
+ if (bytex < Smallest)
+@@ -2799,7 +4423,7 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat,
+ Largest = byte;
+
+ /* DqsRcvEn byte 7,6 */
+- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x21);
++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x21);
+ byte = val & 0xFF;
+ bytex = (val >> 8) & 0xFF;
+ if (bytex < Smallest)
+@@ -2809,7 +4433,7 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat,
+
+ if (pDCTstat->DimmECCPresent> 0) {
+ /*DqsRcvEn Ecc */
+- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x12);
++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x12);
+ byte = val & 0xFF;
+ bytex = (val >> 8) & 0xFF;
+ if (bytex < Smallest)
+@@ -2873,7 +4497,7 @@ static void Get_WrDatGross_Diff(struct DCTStatStruc *pDCTstat,
+ }
+
+ static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat,
+- u32 dev, u32 index_reg,
++ u32 dev, uint8_t dct, u32 index_reg,
+ u32 index)
+ {
+ u8 Smallest, Largest;
+@@ -2891,7 +4515,7 @@ static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat,
+
+ for (i=0; i < 8; i+=2) {
+ if ( pDCTstat->DIMMValid & (1 << i)) {
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
+ val &= 0x00E000E0;
+ byte = (val >> 5) & 0xFF;
+ if (byte < Smallest)
+@@ -2929,7 +4553,7 @@ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat,
+ Smallest = 3;
+ Largest = 0;
+ for (i=0; i < 2; i++) {
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
+ val &= 0x60606060;
+ val >>= 5;
+ for (j=0; j < 4; j++) {
+@@ -2945,7 +4569,7 @@ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat,
+
+ if (pDCTstat->DimmECCPresent > 0) {
+ index++;
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
+ val &= 0x00000060;
+ val >>= 5;
+ byte = val & 0xFF;
+@@ -2965,25 +4589,30 @@ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat,
+ static void mct_PhyController_Config(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 index_reg = 0x98 + 0x100 * dct;
++ uint8_t index;
++ uint32_t dword;
++ u32 index_reg = 0x98;
+ u32 dev = pDCTstat->dev_dct;
+- u32 val;
+
+- if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3)) {
++ if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3 | AMD_FAM15_ALL)) {
+ if (pDCTstat->Dimmx4Present == 0) {
+- /* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for power saving */
+- val = Get_NB32_index_wait(dev, index_reg, 0x0D0F0F13); /* Agesa v3 v6 might be wrong here. */
+- val |= 1 << 7; /* BIOS should set this bit when x4 DIMMs are not present */
+- Set_NB32_index_wait(dev, index_reg, 0x0D0F0F13, val);
++ /* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for
++ * additional power saving when x4 DIMMs are not present.
++ */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8));
++ dword |= (0x1 << 7); /* RxDqsUDllPowerDown = 1 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8), dword);
++ }
+ }
+ }
+
+- if (pDCTstat->LogicalCPUID & AMD_DR_DAC2_OR_C3) {
++ if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_FAM15_ALL)) {
+ if (pDCTstat->DimmECCPresent == 0) {
+ /* Set bit4 PwrDn to register F2x[1, 0]98_x0D0F0830 for power saving */
+- val = Get_NB32_index_wait(dev, index_reg, 0x0D0F0830);
+- val |= 1 << 4; /* BIOS should set this bit if ECC DIMMs are not present */
+- Set_NB32_index_wait(dev, index_reg, 0x0D0F0830, val);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0830);
++ dword |= 1 << 4; /* BIOS should set this bit if ECC DIMMs are not present */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0830, dword);
+ }
+ }
+
+@@ -3024,21 +4653,61 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
+ val &= ~(1 << 12);
+
+ val &= 0x0FFFFFFF;
+- switch (pDCTstat->Speed) {
+- case 4:
+- val |= 0x50000000; /* 5 for DDR800 */
+- break;
+- case 5:
+- val |= 0x60000000; /* 6 for DDR1066 */
+- break;
+- case 6:
+- val |= 0x80000000; /* 8 for DDR800 */
+- break;
+- default:
+- val |= 0x90000000; /* 9 for DDR1600 */
+- break;
++ if (!is_fam15h()) {
++ switch (pDCTstat->Speed) {
++ case 4:
++ val |= 0x50000000; /* 5 for DDR800 */
++ break;
++ case 5:
++ val |= 0x60000000; /* 6 for DDR1066 */
++ break;
++ case 6:
++ val |= 0x80000000; /* 8 for DDR800 */
++ break;
++ default:
++ val |= 0x90000000; /* 9 for DDR1600 */
++ break;
++ }
+ }
+ Set_NB32(pDCTstat->dev_dct, 0x1B0, val);
++
++ if (is_fam15h()) {
++ uint8_t wm1;
++ uint8_t wm2;
++
++ switch (pDCTstat->Speed) {
++ case 0x4:
++ wm1 = 0x3;
++ wm2 = 0x4;
++ break;
++ case 0x6:
++ wm1 = 0x3;
++ wm2 = 0x5;
++ break;
++ case 0xa:
++ wm1 = 0x4;
++ wm2 = 0x6;
++ break;
++ case 0xe:
++ wm1 = 0x5;
++ wm2 = 0x8;
++ break;
++ case 0x12:
++ wm1 = 0x6;
++ wm2 = 0x9;
++ break;
++ default:
++ wm1 = 0x7;
++ wm2 = 0xa;
++ break;
++ }
++
++ val = Get_NB32(pDCTstat->dev_dct, 0x1B4);
++ val &= ~(0x3ff);
++ val |= ((wm2 & 0x1f) << 5);
++ val |= (wm1 & 0x1f);
++ Set_NB32(pDCTstat->dev_dct, 0x1B4, val);
++ }
+ }
+ }
+
+@@ -3055,16 +4724,103 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
+ }
+ }
+
++void mct_ForceNBPState0_En_Fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat)
++{
++ /* Force the NB P-state to P0 */
++ uint32_t dword;
++ uint32_t dword2;
++
++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x174);
++ if (!(dword & 0x1)) {
++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x170);
++ pDCTstat->SwNbPstateLoDis = (dword >> 14) & 0x1;
++ pDCTstat->NbPstateDisOnP0 = (dword >> 13) & 0x1;
++ pDCTstat->NbPstateThreshold = (dword >> 9) & 0x7;
++ pDCTstat->NbPstateHi = (dword >> 6) & 0x3;
++ dword &= ~(0x1 << 14); /* SwNbPstateLoDis = 0 */
++ dword &= ~(0x1 << 13); /* NbPstateDisOnP0 = 0 */
++ dword &= ~(0x7 << 9); /* NbPstateThreshold = 0 */
++ dword &= ~(0x3 << 3); /* NbPstateLo = NbPstateMaxVal */
++ dword |= ((dword & 0x3) << 3);
++ Set_NB32(pDCTstat->dev_nbctl, 0x170, dword);
++
++ /* Wait until CurNbPState == NbPstateLo */
++ do {
++ dword2 = Get_NB32(pDCTstat->dev_nbctl, 0x174);
++ } while (((dword2 << 19) & 0x7) != (dword & 0x3));
++
++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x170);
++ dword &= ~(0x3 << 6); /* NbPstateHi = 0 */
++ dword |= (0x3 << 14); /* SwNbPstateLoDis = 1 */
++ Set_NB32(pDCTstat->dev_nbctl, 0x170, dword);
++
++ /* Wait until CurNbPState == 0 */
++ do {
++ dword2 = Get_NB32(pDCTstat->dev_nbctl, 0x174);
++ } while (((dword2 << 19) & 0x7) != 0);
++ }
++}
++
++void mct_ForceNBPState0_Dis_Fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat)
++{
++ /* Restore normal NB P-state functionailty */
++ uint32_t dword;
++
++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x174);
++ if (!(dword & 0x1)) {
++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x170);
++ dword &= ~(0x1 << 14); /* SwNbPstateLoDis*/
++ dword |= ((pDCTstat->SwNbPstateLoDis & 0x1) << 14);
++ dword &= ~(0x1 << 13); /* NbPstateDisOnP0 */
++ dword |= ((pDCTstat->NbPstateDisOnP0 & 0x1) << 13);
++ dword &= ~(0x7 << 9); /* NbPstateThreshold */
++ dword |= ((pDCTstat->NbPstateThreshold & 0x7) << 9);
++ dword &= ~(0x3 << 6); /* NbPstateHi */
++ dword |= ((pDCTstat->NbPstateHi & 0x3) << 3);
++ Set_NB32(pDCTstat->dev_nbctl, 0x170, dword);
++ }
++}
++
+ static void mct_InitialMCT_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat)
+ {
+- mct_SetClToNB_D(pMCTstat, pDCTstat);
+- mct_SetWbEnhWsbDis_D(pMCTstat, pDCTstat);
++ if (is_fam15h()) {
++ msr_t p0_state_msr;
++ uint8_t cpu_fid;
++ uint8_t cpu_did;
++ uint32_t cpu_divisor;
++ uint8_t boost_states;
++
++ /* Retrieve the number of boost states */
++ boost_states = (Get_NB32(pDCTstat->dev_link, 0x15c) >> 2) & 0x7;
++
++ /* Retrieve and store the TSC frequency (P0 COF) */
++ p0_state_msr = rdmsr(0xc0010064 + boost_states);
++ cpu_fid = p0_state_msr.lo & 0x3f;
++ cpu_did = (p0_state_msr.lo >> 6) & 0x7;
++ cpu_divisor = (0x1 << cpu_did);
++ pMCTstat->TSCFreq = (100 * (cpu_fid + 0x10)) / cpu_divisor;
++
++ mct_ForceNBPState0_En_Fam15(pMCTstat, pDCTstat);
++ } else {
++ /* K10 BKDG v3.62 section 2.8.9.2 */
++ printk(BIOS_DEBUG, "mct_InitialMCT_D: clear_legacy_Mode\n");
++ clear_legacy_Mode(pMCTstat, pDCTstat);
++
++ /* Northbridge configuration */
++ mct_SetClToNB_D(pMCTstat, pDCTstat);
++ mct_SetWbEnhWsbDis_D(pMCTstat, pDCTstat);
++ }
+ }
+
+ static u32 mct_NodePresent_D(void)
+ {
+ u32 val;
+- val = 0x12001022;
++ if (is_fam15h())
++ val = 0x16001022;
++ else
++ val = 0x12001022;
+ return val;
+ }
+
+@@ -3097,14 +4853,13 @@ static void clear_legacy_Mode(struct MCTStatStruc *pMCTstat,
+
+ /* Clear Legacy BIOS Mode bit */
+ reg = 0x94;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 0, reg);
+ val &= ~(1<<LegacyBiosMode);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, 0, reg, val);
+
+- reg = 0x94 + 0x100;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 1, reg);
+ val &= ~(1<<LegacyBiosMode);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, 1, reg, val);
+ }
+
+ static void mct_HTMemMapExt(struct MCTStatStruc *pMCTstat,
+@@ -3171,7 +4926,7 @@ static void SetCSTriState(struct MCTStatStruc *pMCTstat,
+ {
+ u32 val;
+ u32 dev = pDCTstat->dev_dct;
+- u32 index_reg = 0x98 + 0x100 * dct;
++ u32 index_reg = 0x98;
+ u32 index;
+ u16 word;
+
+@@ -3186,9 +4941,9 @@ static void SetCSTriState(struct MCTStatStruc *pMCTstat,
+ }
+ word = (~word) & 0xFF;
+ index = 0x0c;
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
+ val |= word;
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
+ }
+
+ static void SetCKETriState(struct MCTStatStruc *pMCTstat,
+@@ -3196,7 +4951,7 @@ static void SetCKETriState(struct MCTStatStruc *pMCTstat,
+ {
+ u32 val;
+ u32 dev;
+- u32 index_reg = 0x98 + 0x100 * dct;
++ u32 index_reg = 0x98;
+ u32 index;
+ u16 word;
+
+@@ -3208,14 +4963,14 @@ static void SetCKETriState(struct MCTStatStruc *pMCTstat,
+ word = pDCTstat->CSPresent;
+
+ index = 0x0c;
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
+ if ((word & 0x55) == 0)
+ val |= 1 << 12;
+
+ if ((word & 0xAA) == 0)
+ val |= 1 << 13;
+
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
+ }
+
+ static void SetODTTriState(struct MCTStatStruc *pMCTstat,
+@@ -3223,7 +4978,7 @@ static void SetODTTriState(struct MCTStatStruc *pMCTstat,
+ {
+ u32 val;
+ u32 dev;
+- u32 index_reg = 0x98 + 0x100 * dct;
++ u32 index_reg = 0x98;
+ u8 cs;
+ u32 index;
+ u8 odt;
+@@ -3257,86 +5012,281 @@ static void SetODTTriState(struct MCTStatStruc *pMCTstat,
+ }
+
+ index = 0x0C;
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
+ val |= ((odt & 0xFF) << 8); /* set bits 11:8 ODTTriState[3:0] */
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
++
++}
++
++/* Family 15h */
++static void InitDDRPhy(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct)
++{
++ uint8_t index;
++ uint32_t dword;
++ uint8_t ddr_voltage_index;
++ uint8_t amd_voltage_level_index = 0;
++ uint32_t index_reg = 0x98;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
+
++ /* Find current DDR supply voltage for this DCT */
++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
++
++ /* Fam15h BKDG v3.14 section 2.10.5.3
++ * The remainder of the Phy Initialization algorithm picks up in phyAssistedMemFnceTraining
++ */
++ for (dct = 0; dct < 2; dct++) {
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000b, 0x80000000);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe013, 0x00000118);
++
++ /* Program desired VDDIO level */
++ if (ddr_voltage_index & 0x4) {
++ /* 1.25V */
++ amd_voltage_level_index = 0x2;
++ } else if (ddr_voltage_index & 0x2) {
++ /* 1.35V */
++ amd_voltage_level_index = 0x1;
++ } else if (ddr_voltage_index & 0x1) {
++ /* 1.50V */
++ amd_voltage_level_index = 0x0;
++ }
++
++ /* D18F2x9C_x0D0F_0[F,8:0]1F_dct[1:0][RxVioLvl] */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8), dword);
++ }
++
++ /* D18F2x9C_x0D0F_[C,8,2][2:0]1F_dct[1:0][RxVioLvl] */
++ for (index = 0; index < 0x3; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8), dword);
++ }
++ for (index = 0; index < 0x2; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8), dword);
++ }
++ for (index = 0; index < 0x1; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8));
++ dword &= ~(0x3 << 3);
++ dword |= (amd_voltage_level_index << 3);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8), dword);
++ }
++
++ /* D18F2x9C_x0D0F_4009_dct[1:0][CmpVioLvl, ComparatorAdjust] */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009);
++ dword &= ~(0x0000c00c);
++ dword |= (amd_voltage_level_index << 14);
++ dword |= (amd_voltage_level_index << 2);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009, dword);
++ }
++
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+
+ static void InitPhyCompensation(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+ u8 i;
+- u32 index_reg = 0x98 + 0x100 * dct;
++ u32 index_reg = 0x98;
+ u32 dev = pDCTstat->dev_dct;
+- u32 val;
+ u32 valx = 0;
+- u32 dword;
++ uint8_t index;
++ uint32_t dword;
+ const u8 *p;
+
+- val = Get_NB32_index_wait(dev, index_reg, 0x00);
+- dword = 0;
+- for (i=0; i < 6; i++) {
+- switch (i) {
+- case 0:
+- case 4:
+- p = Table_Comp_Rise_Slew_15x;
+- valx = p[(val >> 16) & 3];
+- break;
+- case 1:
+- case 5:
+- p = Table_Comp_Fall_Slew_15x;
+- valx = p[(val >> 16) & 3];
+- break;
+- case 2:
+- p = Table_Comp_Rise_Slew_20x;
+- valx = p[(val >> 8) & 3];
+- break;
+- case 3:
+- p = Table_Comp_Fall_Slew_20x;
+- valx = p[(val >> 8) & 3];
+- break;
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
++ if (is_fam15h()) {
++ /* Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 */
++ uint32_t tx_pre;
++ uint32_t drive_strength;
++
++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003);
++ dword |= (0x3 << 13);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003, dword);
++
++ /* Determine TxPreP/TxPreN for data lanes (Stage 1) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
++ drive_strength = (dword >> 20) & 0x7; /* DqsDrvStren */
++ tx_pre = fam15h_phy_predriver_calibration_code(pDCTstat, dct, drive_strength);
++
++ /* Program TxPreP/TxPreN for data lanes (Stage 1) */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0006 | (index << 8));
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0006 | (index << 8), dword);
++ }
+
++ /* Determine TxPreP/TxPreN for data lanes (Stage 2) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
++ drive_strength = (dword >> 16) & 0x7; /* DataDrvStren */
++ tx_pre = fam15h_phy_predriver_calibration_code(pDCTstat, dct, drive_strength);
++
++ /* Program TxPreP/TxPreN for data lanes (Stage 2) */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000a | (index << 8));
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000a | (index << 8), dword);
++ }
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0002 | (index << 8));
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0002 | (index << 8), dword);
+ }
+- dword |= valx << (5 * i);
+- }
+
+- /* Override/Exception */
+- if (!pDCTstat->GangedMode) {
+- i = 0; /* use i for the dct setting required */
+- if (pDCTstat->MAdimms[0] < 4)
+- i = 1;
+- if (((pDCTstat->Speed == 2) || (pDCTstat->Speed == 3)) && (pDCTstat->MAdimms[i] == 4)) {
+- dword &= 0xF18FFF18;
+- index_reg = 0x98; /* force dct = 0 */
++ /* Determine TxPreP/TxPreN for command/address lines (Stage 1) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
++ drive_strength = (dword >> 4) & 0x7; /* CsOdtDrvStren */
++ tx_pre = fam15h_phy_predriver_cmd_addr_calibration_code(pDCTstat, dct, drive_strength);
++
++ /* Program TxPreP/TxPreN for command/address lines (Stage 1) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8006);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8006, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f800a);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f800a, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8002);
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8002, dword);
++
++ /* Determine TxPreP/TxPreN for command/address lines (Stage 2) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
++ drive_strength = (dword >> 8) & 0x7; /* AddrCmdDrvStren */
++ tx_pre = fam15h_phy_predriver_cmd_addr_calibration_code(pDCTstat, dct, drive_strength);
++
++ /* Program TxPreP/TxPreN for command/address lines (Stage 2) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8106);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8106, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f810a);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f810a, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc006);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc006, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00a);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00a, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00e);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00e, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc012);
++ dword &= ~(0xfff);
++ dword |= tx_pre;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc012, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8102);
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8102, dword);
++
++ /* Determine TxPreP/TxPreN for command/address lines (Stage 3) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
++ drive_strength = (dword >> 0) & 0x7; /* CkeDrvStren */
++ tx_pre = fam15h_phy_predriver_cmd_addr_calibration_code(pDCTstat, dct, drive_strength);
++
++ /* Program TxPreP/TxPreN for command/address lines (Stage 3) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc002);
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc002, dword);
++
++ /* Determine TxPreP/TxPreN for clock lines */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000);
++ drive_strength = (dword >> 12) & 0x7; /* ClkDrvStren */
++ tx_pre = fam15h_phy_predriver_clk_calibration_code(pDCTstat, dct, drive_strength);
++
++ /* Program TxPreP/TxPreN for clock lines */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2002);
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2002, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2102);
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2102, dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2202);
++ dword &= ~(0xfff);
++ dword |= (0x8000 | tx_pre);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2202, dword);
++ } else {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00);
++ dword = 0;
++ for (i=0; i < 6; i++) {
++ switch (i) {
++ case 0:
++ case 4:
++ p = Table_Comp_Rise_Slew_15x;
++ valx = p[(dword >> 16) & 3];
++ break;
++ case 1:
++ case 5:
++ p = Table_Comp_Fall_Slew_15x;
++ valx = p[(dword >> 16) & 3];
++ break;
++ case 2:
++ p = Table_Comp_Rise_Slew_20x;
++ valx = p[(dword >> 8) & 3];
++ break;
++ case 3:
++ p = Table_Comp_Fall_Slew_20x;
++ valx = p[(dword >> 8) & 3];
++ break;
++ }
++ dword |= valx << (5 * i);
+ }
++
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0a, dword);
+ }
+
+- Set_NB32_index_wait(dev, index_reg, 0x0a, dword);
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+
+ static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 reg;
+- u32 val;
+- u32 dev = pDCTstat->dev_dct;
+-
+- /* GhEnhancement #18429 modified by askar: For low NB CLK :
+- * Memclk ratio, the DCT may need to arbitrate early to avoid
+- * unnecessary bubbles.
+- * bit 19 of F2x[1,0]78 Dram Control Register, set this bit only when
+- * NB CLK : Memclk ratio is between 3:1 (inclusive) to 4:5 (inclusive)
+- */
+- reg = 0x78 + 0x100 * dct;
+- val = Get_NB32(dev, reg);
+-
+- if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx))
+- val |= (1 << EarlyArbEn);
+- else if (CheckNBCOFEarlyArbEn(pMCTstat, pDCTstat))
+- val |= (1 << EarlyArbEn);
+-
+- Set_NB32(dev, reg, val);
++ if (!is_fam15h()) {
++ u32 reg;
++ u32 val;
++ u32 dev = pDCTstat->dev_dct;
++
++ /* GhEnhancement #18429 modified by askar: For low NB CLK :
++ * Memclk ratio, the DCT may need to arbitrate early to avoid
++ * unnecessary bubbles.
++ * bit 19 of F2x[1,0]78 Dram Control Register, set this bit only when
++ * NB CLK : Memclk ratio is between 3:1 (inclusive) to 4:5 (inclusive)
++ */
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, dct, reg);
++
++ if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx))
++ val |= (1 << EarlyArbEn);
++ else if (CheckNBCOFEarlyArbEn(pMCTstat, pDCTstat))
++ val |= (1 << EarlyArbEn);
++
++ Set_NB32_DCT(dev, dct, reg, val);
++ }
+ }
+
+ static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat,
+@@ -3359,9 +5309,9 @@ static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat,
+ NbDid |= 1;
+
+ reg = 0x94;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 0, reg);
+ if (!(val & (1 << MemClkFreqVal)))
+- val = Get_NB32(dev, reg + 0x100); /* get the DCT1 value */
++ val = Get_NB32_DCT(dev, 1, reg); /* get the DCT1 value */
+
+ val &= 0x07;
+ val += 3;
+@@ -3430,28 +5380,204 @@ static void mct_ResetDataStruct_D(struct MCTStatStruc *pMCTstat,
+ }
+
+ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat)
++ struct DCTStatStruc *pDCTstat, u8 dct)
++{
++ mct_ProgramODT_D(pMCTstat, pDCTstat, dct);
++}
++
++static void mct_ProgramODT_D(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+ u8 i;
+- u32 reg_off, dword;
++ u32 dword;
+ u32 dev = pDCTstat->dev_dct;
+
+- if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ if (is_fam15h()) {
++ /* Obtain number of DIMMs on channel */
++ uint8_t dimm_count = pDCTstat->MAdimms[dct];
++ uint8_t rank_count_dimm0;
++ uint8_t rank_count_dimm1;
++ uint32_t odt_pattern_0;
++ uint32_t odt_pattern_1;
++ uint32_t odt_pattern_2;
++ uint32_t odt_pattern_3;
++ uint8_t write_odt_duration;
++ uint8_t read_odt_duration;
++ uint8_t write_odt_delay;
++ uint8_t read_odt_delay;
++
++ /* Select appropriate ODT pattern for installed DIMMs
++ * Refer to the Fam15h BKDG Rev. 3.14, page 149 onwards
++ */
++ if (pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_REGISTERED]) {
++ if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++ if (rank_count_dimm1 == 1) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00020000;
++ } else if (rank_count_dimm1 == 2) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x08020000;
++ } else if (rank_count_dimm1 == 4) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x020a0000;
++ odt_pattern_3 = 0x080a0000;
++ } else {
++ /* Fallback */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x08020000;
++ }
++ } else {
++ /* 2 DIMMs detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0];
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++ if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x01010202;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x09030603;
++ } else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) {
++ odt_pattern_0 = 0x01010000;
++ odt_pattern_1 = 0x01010a0a;
++ odt_pattern_2 = 0x01090000;
++ odt_pattern_3 = 0x01030e0b;
++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) {
++ odt_pattern_0 = 0x00000202;
++ odt_pattern_1 = 0x05050202;
++ odt_pattern_2 = 0x00000206;
++ odt_pattern_3 = 0x0d070203;
++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) {
++ odt_pattern_0 = 0x05050a0a;
++ odt_pattern_1 = 0x05050a0a;
++ odt_pattern_2 = 0x050d0a0e;
++ odt_pattern_3 = 0x05070a0b;
++ } else {
++ /* Fallback */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ }
++ } else {
++ /* FIXME
++ * 3 DIMMs per channel UNIMPLEMENTED
++ */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ } else if (pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED]) {
++ /* TODO
++ * Load reduced dimms UNIMPLEMENTED
++ */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ } else {
++ if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1];
++ if (rank_count_dimm1 == 1) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00020000;
++ } else if (rank_count_dimm1 == 2) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x08020000;
++ } else {
++ /* Fallback */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x08020000;
++ }
++ } else {
++ /* 2 DIMMs detected */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x01010202;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x09030603;
++ }
++ } else {
++ /* FIXME
++ * 3 DIMMs per channel UNIMPLEMENTED
++ */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ }
++
++ if (pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED]) {
++ /* TODO
++ * Load reduced dimms UNIMPLEMENTED
++ */
++ write_odt_duration = 0x0;
++ read_odt_duration = 0x0;
++ write_odt_delay = 0x0;
++ read_odt_delay = 0x0;
++ } else {
++ uint8_t tcl;
++ uint8_t tcwl;
++ tcl = Get_NB32_DCT(dev, dct, 0x200) & 0x1f;
++ tcwl = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f;
++
++ write_odt_duration = 0x6;
++ read_odt_duration = 0x6;
++ write_odt_delay = 0x0;
++ if (tcl > tcwl)
++ read_odt_delay = tcl - tcwl;
++ else
++ read_odt_delay = 0x0;
++ }
++
++ /* Program ODT pattern */
++ Set_NB32_DCT(dev, dct, 0x230, odt_pattern_1);
++ Set_NB32_DCT(dev, dct, 0x234, odt_pattern_0);
++ Set_NB32_DCT(dev, dct, 0x238, odt_pattern_3);
++ Set_NB32_DCT(dev, dct, 0x23c, odt_pattern_2);
++ dword = Get_NB32_DCT(dev, dct, 0x240);
++ dword &= ~(0x7 << 12); /* WrOdtOnDuration = write_odt_duration */
++ dword |= (write_odt_duration & 0x7) << 12;
++ dword &= ~(0x7 << 8); /* WrOdtTrnOnDly = write_odt_delay */
++ dword |= (write_odt_delay & 0x7) << 8;
++ dword &= ~(0xf << 4); /* RdOdtOnDuration = read_odt_duration */
++ dword |= (read_odt_duration & 0xf) << 4;
++ dword &= ~(0xf); /* RdOdtTrnOnDly = read_odt_delay */
++ dword |= (read_odt_delay & 0xf);
++ Set_NB32_DCT(dev, dct, 0x240, dword);
++ } else if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
+ if (pDCTstat->Speed == 3)
+ dword = 0x00000800;
+ else
+ dword = 0x00000000;
+ for (i=0; i < 2; i++) {
+- reg_off = 0x100 * i;
+- Set_NB32(dev, 0x98 + reg_off, 0x0D000030);
+- Set_NB32(dev, 0x9C + reg_off, dword);
+- Set_NB32(dev, 0x98 + reg_off, 0x4D040F30);
+-
+- /* FIXME
+- * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+- * For now assume a maximum of 2 DIMMs per channel can be installed
+- */
+- uint8_t MaxDimmsInstallable = 2;
++ Set_NB32_DCT(dev, i, 0x98, 0x0D000030);
++ Set_NB32_DCT(dev, i, 0x9C, dword);
++ Set_NB32_DCT(dev, i, 0x98, 0x4D040F30);
+
+ /* Obtain number of DIMMs on channel */
+ uint8_t dimm_count = pDCTstat->MAdimms[i];
+@@ -3463,7 +5589,7 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
+ uint32_t odt_pattern_3;
+
+ /* Select appropriate ODT pattern for installed DIMMs
+- * Refer to the BKDG Rev. 3.62, page 120 onwards
++ * Refer to the Fam10h BKDG Rev. 3.62, page 120 onwards
+ */
+ if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) {
+ if (MaxDimmsInstallable == 2) {
+@@ -3574,10 +5700,10 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
+ }
+
+ /* Program ODT pattern */
+- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1);
+- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0);
+- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3);
+- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2);
++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x180, odt_pattern_1);
++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x181, odt_pattern_0);
++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x182, odt_pattern_3);
++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x183, odt_pattern_2);
+ }
+ }
+ }
+@@ -3585,34 +5711,32 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
+ static void mct_EnDllShutdownSR(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct, val;
+
+ /* Write 0000_07D0h to register F2x[1, 0]98_x4D0FE006 */
+ if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3)) {
+- Set_NB32(dev, 0x9C + reg_off, 0x1C);
+- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE006);
+- Set_NB32(dev, 0x9C + reg_off, 0x13D);
+- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE007);
++ Set_NB32_DCT(dev, dct, 0x9C, 0x1C);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE006);
++ Set_NB32_DCT(dev, dct, 0x9C, 0x13D);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE007);
+
+- val = Get_NB32(dev, 0x90 + reg_off);
++ val = Get_NB32_DCT(dev, dct, 0x90);
+ val &= ~(1 << 27/* DisDllShutdownSR */);
+- Set_NB32(dev, 0x90 + reg_off, val);
++ Set_NB32_DCT(dev, dct, 0x90, val);
+ }
+ }
+
+ static u32 mct_DisDllShutdownSR(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u32 DramConfigLo, u8 dct)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+
+ /* Write 0000_07D0h to register F2x[1, 0]98_x4D0FE006 */
+ if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3)) {
+- Set_NB32(dev, 0x9C + reg_off, 0x7D0);
+- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE006);
+- Set_NB32(dev, 0x9C + reg_off, 0x190);
+- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE007);
++ Set_NB32_DCT(dev, dct, 0x9C, 0x7D0);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE006);
++ Set_NB32_DCT(dev, dct, 0x9C, 0x190);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE007);
+
+ DramConfigLo |= /* DisDllShutdownSR */ 1 << 27;
+ }
+@@ -3704,52 +5828,61 @@ void ProgDramMRSReg_D(struct MCTStatStruc *pMCTstat,
+ DramMRS |= 1 << 23;
+ }
+ }
+- /*
+- DRAM MRS Register
+- DrvImpCtrl: drive impedance control.01b(34 ohm driver; Ron34 = Rzq/7)
+- */
+- DramMRS |= 1 << 2;
+- /* Dram nominal termination: */
+- byte = pDCTstat->MAdimms[dct];
+- if (!(pDCTstat->Status & (1 << SB_Registered))) {
+- DramMRS |= 1 << 7; /* 60 ohms */
+- if (byte & 2) {
+- if (pDCTstat->Speed < 6)
+- DramMRS |= 1 << 8; /* 40 ohms */
+- else
+- DramMRS |= 1 << 9; /* 30 ohms */
++
++ if (is_fam15h()) {
++ DramMRS |= (0x1 << 23); /* PchgPDModeSel = 1 */
++ } else {
++ /*
++ DRAM MRS Register
++ DrvImpCtrl: drive impedance control.01b(34 ohm driver; Ron34 = Rzq/7)
++ */
++ DramMRS |= 1 << 2;
++ /* Dram nominal termination: */
++ byte = pDCTstat->MAdimms[dct];
++ if (!(pDCTstat->Status & (1 << SB_Registered))) {
++ DramMRS |= 1 << 7; /* 60 ohms */
++ if (byte & 2) {
++ if (pDCTstat->Speed < 6)
++ DramMRS |= 1 << 8; /* 40 ohms */
++ else
++ DramMRS |= 1 << 9; /* 30 ohms */
++ }
+ }
+- }
+- /* Dram dynamic termination: Disable(1DIMM), 120ohm(>=2DIMM) */
+- if (!(pDCTstat->Status & (1 << SB_Registered))) {
+- if (byte >= 2) {
+- if (pDCTstat->Speed == 7)
+- DramMRS |= 1 << 10;
+- else
+- DramMRS |= 1 << 11;
++ /* Dram dynamic termination: Disable(1DIMM), 120ohm(>=2DIMM) */
++ if (!(pDCTstat->Status & (1 << SB_Registered))) {
++ if (byte >= 2) {
++ if (pDCTstat->Speed == 7)
++ DramMRS |= 1 << 10;
++ else
++ DramMRS |= 1 << 11;
++ }
++ } else {
++ DramMRS |= mct_DramTermDyn_RDimm(pMCTstat, pDCTstat, byte);
+ }
+- } else {
+- DramMRS |= mct_DramTermDyn_RDimm(pMCTstat, pDCTstat, byte);
++
++ /* Qoff=0, output buffers enabled */
++ /* Tcwl */
++ DramMRS |= (pDCTstat->Speed - 4) << 20;
++ /* ASR=1, auto self refresh */
++ /* SRT=0 */
++ DramMRS |= 1 << 18;
+ }
+
+ /* burst length control */
+ if (pDCTstat->Status & (1 << SB_128bitmode))
+ DramMRS |= 1 << 1;
+- /* Qoff=0, output buffers enabled */
+- /* Tcwl */
+- DramMRS |= (pDCTstat->Speed - 4) << 20;
+- /* ASR=1, auto self refresh */
+- /* SRT=0 */
+- DramMRS |= 1 << 18;
+-
+- dword = Get_NB32(pDCTstat->dev_dct, 0x100 * dct + 0x84);
+- dword &= ~0x00FC2F8F;
++
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x84);
++ if (is_fam15h())
++ dword &= ~0x00800003;
++ else
++ dword &= ~0x00fc2f8f;
+ dword |= DramMRS;
+- Set_NB32(pDCTstat->dev_dct, 0x100 * dct + 0x84, dword);
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x84, dword);
+ }
+
+-void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct,
+- u32 DramConfigHi)
++void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi)
+ {
+ /* Bug#15114: Comp. update interrupted by Freq. change can cause
+ * subsequent update to be invalid during any MemClk frequency change:
+@@ -3778,45 +5911,86 @@ void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct,
+ */
+
+ u32 dev = pDCTstat->dev_dct;
+- u32 index_reg = 0x98 + 0x100 * dct;
++ u32 index_reg = 0x98;
+ u32 index;
+
+- u32 val;
++ uint32_t dword;
++
++ if (is_fam15h()) {
++ /* Initial setup for frequency change
++ * 9C_x0000_0004 must be configured before MemClkFreqVal is set
++ */
+
+- index = 0x08;
+- val = Get_NB32_index_wait(dev, index_reg, index);
+- if (!(val & (1 << DisAutoComp)))
+- Set_NB32_index_wait(dev, index_reg, index, val | (1 << DisAutoComp));
++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0x190 */
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006);
++ dword &= ~(0x0000ffff);
++ dword |= 0x00000190;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006, dword);
+
+- mct_Wait(100);
++ dword = Get_NB32_DCT(dev, dct, 0x94);
++ dword &= ~(1 << MemClkFreqVal);
++ Set_NB32_DCT(dev, dct, 0x94, dword);
+
+- Set_NB32(dev, 0x94 + 0x100 * dct, DramConfigHi);
++ dword = DramConfigHi;
++ dword &= ~(1 << MemClkFreqVal);
++ Set_NB32_DCT(dev, dct, 0x94, dword);
++
++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct);
++ set_2t_configuration(pMCTstat, pDCTstat, dct);
++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, dct);
++ mct_PlatformSpec(pMCTstat, pDCTstat, dct);
++ } else {
++ index = 0x08;
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
++ if (!(dword & (1 << DisAutoComp)))
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, dword | (1 << DisAutoComp));
++
++ mct_Wait(100);
++ }
++
++ /* Program the DRAM Configuration High register */
++ Set_NB32_DCT(dev, dct, 0x94, DramConfigHi);
++
++ if (is_fam15h()) {
++ /* Wait until F2x[1, 0]94[FreqChgInProg]=0. */
++ do {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94);
++ } while (dword & (1 << FreqChgInProg));
++
++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0xf */
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006);
++ dword &= ~(0x0000ffff);
++ dword |= 0x0000000f;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006, dword);
++ }
+ }
+
+ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+ {
+- u8 Node;
+- struct DCTStatStruc *pDCTstat;
++ if (!is_fam15h()) {
++ u8 Node;
++ struct DCTStatStruc *pDCTstat;
+
+- /* Errata 178
+- *
+- * Bug#15115: Uncertainty In The Sync Chain Leads To Setup Violations
+- * In TX FIFO
+- * Solution: BIOS should program DRAM Control Register[RdPtrInit] =
+- * 5h, (F2x[1, 0]78[3:0] = 5h).
+- * Silicon Status: Fixed In Rev B0
+- *
+- * Bug#15880: Determine validity of reset settings for DDR PHY timing.
+- * Solution: At least, set WrDqs fine delay to be 0 for DDR3 training.
+- */
+- for (Node = 0; Node < 8; Node++) {
+- pDCTstat = pDCTstatA + Node;
++ /* Errata 178
++ *
++ * Bug#15115: Uncertainty In The Sync Chain Leads To Setup Violations
++ * In TX FIFO
++ * Solution: BIOS should program DRAM Control Register[RdPtrInit] =
++ * 5h, (F2x[1, 0]78[3:0] = 5h).
++ * Silicon Status: Fixed In Rev B0
++ *
++ * Bug#15880: Determine validity of reset settings for DDR PHY timing.
++ * Solution: At least, set WrDqs fine delay to be 0 for DDR3 training.
++ */
++ for (Node = 0; Node < 8; Node++) {
++ pDCTstat = pDCTstatA + Node;
+
+- if (pDCTstat->NodePresent) {
+- mct_BeforeDQSTrainSamp(pDCTstat); /* only Bx */
+- mct_ResetDLL_D(pMCTstat, pDCTstat, 0);
+- mct_ResetDLL_D(pMCTstat, pDCTstat, 1);
++ if (pDCTstat->NodePresent) {
++ mct_BeforeDQSTrainSamp(pDCTstat); /* only Bx */
++ mct_ResetDLL_D(pMCTstat, pDCTstat, 0);
++ mct_ResetDLL_D(pMCTstat, pDCTstat, 1);
++ }
+ }
+ }
+ }
+@@ -3827,7 +6001,6 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
+ {
+ u8 Receiver;
+ u32 dev = pDCTstat->dev_dct;
+- u32 reg_off = 0x100 * dct;
+ u32 addr;
+ u32 lo, hi;
+ u8 wrap32dis = 0;
+@@ -3838,6 +6011,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
+ return;
+ }
+
++ /* Skip reset DLL for Family 15h */
++ if (is_fam15h()) {
++ return;
++ }
++
+ addr = HWCR;
+ _RDMSR(addr, &lo, &hi);
+ if(lo & (1<<17)) { /* save the old value */
+@@ -3857,11 +6035,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */
+
+ /* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */
+- Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000);
++ Set_NB32_index_wait_DCT(dev, dct, 0x98, 0xD080F0C, 0x00008000);
+ mct_Wait(80); /* wait >= 300ns */
+
+ /* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */
+- Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000);
++ Set_NB32_index_wait_DCT(dev, dct, 0x98, 0xD080F0C, 0x00000000);
+ mct_Wait(800); /* wait >= 2us */
+ break;
+ }
+@@ -3901,39 +6079,39 @@ static void mct_EnableDatIntlv_D(struct MCTStatStruc *pMCTstat,
+ static void SetDllSpeedUp_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 val;
+- u32 dev = pDCTstat->dev_dct;
+- u32 reg_off = 0x100 * dct;
+-
+- if (pDCTstat->Speed >= 7) { /* DDR1600 and above */
+- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F10 */
+- Set_NB32(dev, reg_off + 0x98, 0x0D080F10);
+- val = Get_NB32(dev, reg_off + 0x9C);
+- val |= 1 < 13;
+- Set_NB32(dev, reg_off + 0x9C, val);
+- Set_NB32(dev, reg_off + 0x98, 0x4D080F10);
+-
+- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F11 */
+- Set_NB32(dev, reg_off + 0x98, 0x0D080F11);
+- val = Get_NB32(dev, reg_off + 0x9C);
+- val |= 1 < 13;
+- Set_NB32(dev, reg_off + 0x9C, val);
+- Set_NB32(dev, reg_off + 0x98, 0x4D080F11);
+-
+- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D088F30 */
+- Set_NB32(dev, reg_off + 0x98, 0x0D088F30);
+- val = Get_NB32(dev, reg_off + 0x9C);
+- val |= 1 < 13;
+- Set_NB32(dev, reg_off + 0x9C, val);
+- Set_NB32(dev, reg_off + 0x98, 0x4D088F30);
+-
+- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D08CF30 */
+- Set_NB32(dev, reg_off + 0x98, 0x0D08CF30);
+- val = Get_NB32(dev, reg_off + 0x9C);
+- val |= 1 < 13;
+- Set_NB32(dev, reg_off + 0x9C, val);
+- Set_NB32(dev, reg_off + 0x98, 0x4D08CF30);
+-
++ if (!is_fam15h()) {
++ u32 val;
++ u32 dev = pDCTstat->dev_dct;
++
++ if (pDCTstat->Speed >= mhz_to_memclk_config(800)) { /* DDR1600 and above */
++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F10 */
++ Set_NB32_DCT(dev, dct, 0x98, 0x0D080F10);
++ val = Get_NB32_DCT(dev, dct, 0x9C);
++ val |= 1 < 13;
++ Set_NB32_DCT(dev, dct, 0x9C, val);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D080F10);
++
++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F11 */
++ Set_NB32_DCT(dev, dct, 0x98, 0x0D080F11);
++ val = Get_NB32_DCT(dev, dct, 0x9C);
++ val |= 1 < 13;
++ Set_NB32_DCT(dev, dct, 0x9C, val);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D080F11);
++
++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D088F30 */
++ Set_NB32_DCT(dev, dct, 0x98, 0x0D088F30);
++ val = Get_NB32_DCT(dev, dct, 0x9C);
++ val |= 1 < 13;
++ Set_NB32_DCT(dev, dct, 0x9C, val);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D088F30);
++
++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D08CF30 */
++ Set_NB32_DCT(dev, dct, 0x98, 0x0D08CF30);
++ val = Get_NB32_DCT(dev, dct, 0x9C);
++ val |= 1 < 13;
++ Set_NB32_DCT(dev, dct, 0x9C, val);
++ Set_NB32_DCT(dev, dct, 0x98, 0x4D08CF30);
++ }
+ }
+ }
+
+@@ -3961,7 +6139,6 @@ static void SyncSetting(struct DCTStatStruc *pDCTstat)
+ static void AfterDramInit_D(struct DCTStatStruc *pDCTstat, u8 dct) {
+
+ u32 val;
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+
+ if (pDCTstat->LogicalCPUID & (AMD_DR_B2 | AMD_DR_B3)) {
+@@ -3969,16 +6146,16 @@ static void AfterDramInit_D(struct DCTStatStruc *pDCTstat, u8 dct) {
+ val = Get_NB32(dev, 0x110);
+ if (!(val & (1 << DramEnabled))) {
+ /* If 50 us expires while DramEnable =0 then do the following */
+- val = Get_NB32(dev, 0x90 + reg_off);
++ val = Get_NB32_DCT(dev, dct, 0x90);
+ val &= ~(1 << Width128); /* Program Width128 = 0 */
+- Set_NB32(dev, 0x90 + reg_off, val);
++ Set_NB32_DCT(dev, dct, 0x90, val);
+
+- val = Get_NB32_index_wait(dev, 0x98 + reg_off, 0x05); /* Perform dummy CSR read to F2x09C_x05 */
++ val = Get_NB32_index_wait_DCT(dev, dct, 0x98, 0x05); /* Perform dummy CSR read to F2x09C_x05 */
+
+ if (pDCTstat->GangedMode) {
+- val = Get_NB32(dev, 0x90 + reg_off);
++ val = Get_NB32_DCT(dev, dct, 0x90);
+ val |= 1 << Width128; /* Program Width128 = 0 */
+- Set_NB32(dev, 0x90 + reg_off, val);
++ Set_NB32_DCT(dev, dct, 0x90, val);
+ }
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+index a947c2d..50fbff7 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+@@ -76,6 +76,8 @@
+ /* #define PA_EXT_DCTADDL (((00 << 3)+5) << 8) */ /*Node x DCT function, Additional Registers PCI Address bits [15:0]*/
+
+ #define PA_NBMISC(Node) ((((0x18+Node) << 3)+3) << 12) /*Node 0 Misc PCI Address bits [15:0]*/
++#define PA_LINK(Node) ((((0x18+Node) << 3)+4) << 12) /*Node 0 Link Control bits [15:0]*/
++#define PA_NBCTL(Node) ((((0x18+Node) << 3)+5) << 12) /*Node 0 NB Control PCI Address bits [15:0]*/
+ /* #define PA_NBDEVOP (((00 << 3)+3) << 8) */ /*Node 0 Misc PCI Address bits [15:0]*/
+
+ #define DCC_EN 1 /* X:2:0x94[19]*/
+@@ -129,7 +131,7 @@
+ #define X4Dimm 12 /* func 2, offset 90h, bit 12*/
+ #define UnBuffDimm 16 /* func 2, offset 90h, bit 16*/
+ #define DimmEcEn 19 /* func 2, offset 90h, bit 19*/
+-#define MemClkFreqVal 3 /* func 2, offset 94h, bit 3*/
++#define MemClkFreqVal ((is_fam15h())?7:3) /* func 2, offset 94h, bit 3 or 7*/
+ #define RDqsEn 12 /* func 2, offset 94h, bit 12*/
+ #define DisDramInterface 14 /* func 2, offset 94h, bit 14*/
+ #define PowerDownEn 15 /* func 2, offset 94h, bit 15*/
+@@ -204,6 +206,7 @@
+ #define JED_PROBEMSK 0x40 /*Analysis Probe installed*/
+ #define JED_RDIMM 0x1 /* RDIMM */
+ #define JED_MiniRDIMM 0x5 /* Mini-RDIMM */
++ #define JED_LRDIMM 0xb /* Load-reduced DIMM */
+ #define SPD_Density 4 /* Bank address bits,SDRAM capacity */
+ #define SPD_Addressing 5 /* Row/Column address bits */
+ #define SPD_Voltage 6 /* Supported voltage bitfield */
+@@ -297,6 +300,7 @@ struct MCTStatStruc {
+ of sub 4GB dram hole for HW remapping.*/
+ u32 Sub4GCacheTop; /* If not zero, the 32-bit top of cacheable memory.*/
+ u32 SysLimit; /* LIMIT[39:8] (system address)*/
++ uint32_t TSCFreq;
+ } __attribute__((packed));
+
+ /*=============================================================================
+@@ -320,7 +324,8 @@ struct MCTStatStruc {
+
+ struct DCTStatStruc { /* A per Node structure*/
+ /* DCTStatStruct_F - start */
+- u8 Node_ID; /* Node ID of current controller*/
++ u8 Node_ID; /* Node ID of current controller */
++ uint8_t stopDCT; /* Set if the DCT will be stopped */
+ u8 ErrCode; /* Current error condition of Node
+ 0= no error
+ 1= Variance Error, DCT is running but not in an optimal configuration.
+@@ -464,7 +469,7 @@ struct DCTStatStruc { /* A per Node structure*/
+ /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/
+ /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/
+ /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/
+- u32 LogicalCPUID; /* The logical CPUID of the node*/
++ uint64_t LogicalCPUID; /* The logical CPUID of the node*/
+ u16 HostBiosSrvc1; /* Word sized general purpose field for use by host BIOS. Scratch space.*/
+ u32 HostBiosSrvc2; /* Dword sized general purpose field for use by host BIOS. Scratch space.*/
+ u16 DimmQRPresent; /* QuadRank DIMM present?*/
+@@ -558,12 +563,20 @@ struct DCTStatStruc { /* A per Node structure*/
+ u8 ClToNB_flag; /* is used to restore ClLinesToNbDis bit after memory */
+ u32 NodeSysBase; /* for channel interleave usage */
+
++ /* Fam15h specific backup variables */
++ uint8_t SwNbPstateLoDis;
++ uint8_t NbPstateDisOnP0;
++ uint8_t NbPstateThreshold;
++ uint8_t NbPstateHi;
++
+ /* New for LB Support */
+ u8 NodePresent;
+ u32 dev_host;
+ u32 dev_map;
+ u32 dev_dct;
+ u32 dev_nbmisc;
++ u32 dev_link;
++ u32 dev_nbctl;
+ u8 TargetFreq;
+ u8 TargetCASL;
+ u8 CtrlWrd3;
+@@ -596,9 +609,10 @@ struct DCTStatStruc { /* A per Node structure*/
+ uint8_t DimmBanks[MAX_DIMMS_SUPPORTED];
+ uint8_t DimmWidth[MAX_DIMMS_SUPPORTED];
+ uint8_t DimmRegistered[MAX_DIMMS_SUPPORTED];
++ uint8_t DimmLoadReduced[MAX_DIMMS_SUPPORTED];
+
+ uint64_t DimmManufacturerID[MAX_DIMMS_SUPPORTED];
+- char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH];
++ char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH+1];
+ uint16_t DimmRevisionNumber[MAX_DIMMS_SUPPORTED];
+ uint32_t DimmSerialNumber[MAX_DIMMS_SUPPORTED];
+ } __attribute__((packed));
+@@ -701,7 +715,64 @@ struct amd_s3_persistent_mct_channel_data {
+ /* Other (1 dword) */
+ uint32_t f3x58;
+
+- /* TOTAL: 250 dwords */
++ /* Family 15h-specific registers (90 dwords) */
++ uint32_t f2x200;
++ uint32_t f2x204;
++ uint32_t f2x208;
++ uint32_t f2x20c;
++ uint32_t f2x210[4]; /* [nb pstate] */
++ uint32_t f2x214;
++ uint32_t f2x218;
++ uint32_t f2x21c;
++ uint32_t f2x22c;
++ uint32_t f2x230;
++ uint32_t f2x234;
++ uint32_t f2x238;
++ uint32_t f2x23c;
++ uint32_t f2x240;
++ uint32_t f2x9cx0d0fe003;
++ uint32_t f2x9cx0d0fe013;
++ uint32_t f2x9cx0d0f0_8_0_1f[9]; /* [lane]*/
++ uint32_t f2x9cx0d0f201f;
++ uint32_t f2x9cx0d0f211f;
++ uint32_t f2x9cx0d0f221f;
++ uint32_t f2x9cx0d0f801f;
++ uint32_t f2x9cx0d0f811f;
++ uint32_t f2x9cx0d0f821f;
++ uint32_t f2x9cx0d0fc01f;
++ uint32_t f2x9cx0d0fc11f;
++ uint32_t f2x9cx0d0fc21f;
++ uint32_t f2x9cx0d0f4009;
++ uint32_t f2x9cx0d0f0_8_0_02[9]; /* [lane]*/
++ uint32_t f2x9cx0d0f0_8_0_06[9]; /* [lane]*/
++ uint32_t f2x9cx0d0f0_8_0_0a[9]; /* [lane]*/
++ uint32_t f2x9cx0d0f2002;
++ uint32_t f2x9cx0d0f2102;
++ uint32_t f2x9cx0d0f2202;
++ uint32_t f2x9cx0d0f8002;
++ uint32_t f2x9cx0d0f8006;
++ uint32_t f2x9cx0d0f800a;
++ uint32_t f2x9cx0d0f8102;
++ uint32_t f2x9cx0d0f8106;
++ uint32_t f2x9cx0d0f810a;
++ uint32_t f2x9cx0d0fc002;
++ uint32_t f2x9cx0d0fc006;
++ uint32_t f2x9cx0d0fc00a;
++ uint32_t f2x9cx0d0fc00e;
++ uint32_t f2x9cx0d0fc012;
++ uint32_t f2x9cx0d0f2031;
++ uint32_t f2x9cx0d0f2131;
++ uint32_t f2x9cx0d0f2231;
++ uint32_t f2x9cx0d0f8031;
++ uint32_t f2x9cx0d0f8131;
++ uint32_t f2x9cx0d0f8231;
++ uint32_t f2x9cx0d0fc031;
++ uint32_t f2x9cx0d0fc131;
++ uint32_t f2x9cx0d0fc231;
++ uint32_t f2x9cx0d0f0_0_f_31[9]; /* [lane] */
++ uint32_t f2x9cx0d0f8021;
++
++ /* TOTAL: 340 dwords */
+ } __attribute__((packed));
+
+ struct amd_s3_persistent_node_data {
+@@ -746,18 +817,19 @@ struct amd_s3_persistent_data {
+ Local Configuration Status (DCTStatStruc.Status[31:0])
+ ===============================================================================*/
+ #define SB_Registered 0 /* All DIMMs are Registered*/
+-#define SB_ECCDIMMs 1 /* All banks ECC capable*/
+-#define SB_PARDIMMs 2 /* All banks Addr/CMD Parity capable*/
+-#define SB_DiagClks 3 /* Jedec ALL slots clock enable diag mode*/
+-#define SB_128bitmode 4 /* DCT in 128-bit mode operation*/
+-#define SB_64MuxedMode 5 /* DCT in 64-bit mux'ed mode.*/
+-#define SB_2TMode 6 /* 2T CMD timing mode is enabled.*/
+-#define SB_SWNodeHole 7 /* Remapping of Node Base on this Node to create a gap.*/
+-#define SB_HWHole 8 /* Memory Hole created on this Node using HW remapping.*/
+-#define SB_Over400MHz 9 /* DCT freq >= 400MHz flag*/
+-#define SB_DQSPos_Pass2 10 /* Using for TrainDQSPos DIMM0/1, when freq>=400MHz*/
+-#define SB_DQSRcvLimit 11 /* Using for DQSRcvEnTrain to know we have reached to upper bound.*/
+-#define SB_ExtConfig 12 /* Indicator the default setting for extend PCI configuration support*/
++#define SB_LoadReduced 1 /* All DIMMs are Load-Reduced*/
++#define SB_ECCDIMMs 2 /* All banks ECC capable*/
++#define SB_PARDIMMs 3 /* All banks Addr/CMD Parity capable*/
++#define SB_DiagClks 4 /* Jedec ALL slots clock enable diag mode*/
++#define SB_128bitmode 5 /* DCT in 128-bit mode operation*/
++#define SB_64MuxedMode 6 /* DCT in 64-bit mux'ed mode.*/
++#define SB_2TMode 7 /* 2T CMD timing mode is enabled.*/
++#define SB_SWNodeHole 8 /* Remapping of Node Base on this Node to create a gap.*/
++#define SB_HWHole 9 /* Memory Hole created on this Node using HW remapping.*/
++#define SB_Over400MHz 10 /* DCT freq >= 400MHz flag*/
++#define SB_DQSPos_Pass2 11 /* Using for TrainDQSPos DIMM0/1, when freq>=400MHz*/
++#define SB_DQSRcvLimit 12 /* Using for DQSRcvEnTrain to know we have reached to upper bound.*/
++#define SB_ExtConfig 13 /* Indicator the default setting for extend PCI configuration support*/
+
+
+ /*===============================================================================
+@@ -775,17 +847,18 @@ struct amd_s3_persistent_data {
+ 266=266MHz (DDR533)
+ 333=333MHz (DDR667)
+ 400=400MHz (DDR800)*/
+-#define NV_ECC_CAP 4 /* Bus ECC capable (1-bits)
++#define NV_MIN_MEMCLK 4 /* Minimum platform demonstrated Memclock (10-bits) */
++#define NV_ECC_CAP 5 /* Bus ECC capable (1-bits)
+ 0=Platform not capable
+ 1=Platform is capable*/
+-#define NV_4RANKType 5 /* Quad Rank DIMM slot type (2-bits)
++#define NV_4RANKType 6 /* Quad Rank DIMM slot type (2-bits)
+ 0=Normal
+ 1=R4 (4-Rank Registered DIMMs in AMD server configuration)
+ 2=S4 (Unbuffered SO-DIMMs)*/
+-#define NV_BYPMAX 6 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition).
++#define NV_BYPMAX 7 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition).
+ 4=4 times bypass (normal for non-UMA systems)
+ 7=7 times bypass (normal for UMA systems)*/
+-#define NV_RDWRQBYP 7 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition).
++#define NV_RDWRQBYP 8 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition).
+ 2=8 times (normal for non-UMA systems)
+ 3=16 times (normal for UMA systems)*/
+
+@@ -848,8 +921,9 @@ struct amd_s3_persistent_data {
+ #define NV_ECCRedir 54 /* Dram ECC Redirection enable*/
+ #define NV_DramBKScrub 55 /* Dram ECC Background Scrubber CTL*/
+ #define NV_L2BKScrub 56 /* L2 ECC Background Scrubber CTL*/
+-#define NV_DCBKScrub 57 /* DCache ECC Background Scrubber CTL*/
+-#define NV_CS_SpareCTL 58 /* Chip Select Spare Control bit 0:
++#define NV_L3BKScrub 57 /* L3 ECC Background Scrubber CTL*/
++#define NV_DCBKScrub 58 /* DCache ECC Background Scrubber CTL*/
++#define NV_CS_SpareCTL 59 /* Chip Select Spare Control bit 0:
+ 0=disable Spare
+ 1=enable Spare */
+ /* Chip Select Spare Control bit 1-4:
+@@ -900,10 +974,12 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalVa
+ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel);
+ void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct);
+ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
+-void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi);
++void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi);
+ void mct_DramInit_Hw_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
+ void mct_SetClToNB_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ void mct_SetWbEnhWsbDis_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
++void mct_ForceNBPState0_En_Fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
++void mct_ForceNBPState0_Dis_Fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass);
+ void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 _DisableDramECC);
+ u32 procOdtWorkaround(struct DCTStatStruc *pDCTstat, u32 dct, u32 val);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
+index c40ea1a..f6aa755 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
+@@ -98,6 +98,15 @@ static u32 bsf(u32 x)
+
+ u32 SetUpperFSbase(u32 addr_hi);
+
++static void proc_MFENCE(void)
++{
++ __asm__ volatile (
++ "outb %%al, $0xed\n\t" /* _EXECFENCE */
++ "mfence\n\t"
++ :::"memory"
++ );
++}
++
+ static void proc_CLFLUSH(u32 addr_hi)
+ {
+ SetUpperFSbase(addr_hi);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c
+index 126642b..3df262b 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -17,6 +18,8 @@
+ * Foundation, Inc.
+ */
+
++/* AM3/ASB2/C32/G34 DDR3 */
++
+ static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
+ u32 *AddrTmgCTL, u32 *ODC_CTL,
+ u8 *CMDmode);
+@@ -24,17 +27,23 @@ static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
+ void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u32 dct)
+ {
+- Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed,
+- pDCTstat->MAload[dct],
+- &(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct]),
+- &pDCTstat->_2Tmode);
++ if (is_fam15h()) {
++ pDCTstat->CH_ADDR_TMG[dct] = fam15h_address_timing_compensation_code(pDCTstat, dct);
++ pDCTstat->CH_ODC_CTL[dct] = fam15h_output_driver_compensation_code(pDCTstat, dct);
++ pDCTstat->_2Tmode = fam15h_slow_access_mode(pDCTstat, dct);
++ } else {
++ Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed,
++ pDCTstat->MAload[dct],
++ &(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct]),
++ &pDCTstat->_2Tmode);
++
++ pDCTstat->CH_ODC_CTL[dct] |= 0x20000000; /* 60ohms */
++ }
+
+ pDCTstat->CH_EccDQSLike[0] = 0x0403;
+ pDCTstat->CH_EccDQSScale[0] = 0x70;
+ pDCTstat->CH_EccDQSLike[1] = 0x0403;
+ pDCTstat->CH_EccDQSScale[1] = 0x70;
+-
+- pDCTstat->CH_ODC_CTL[dct] |= 0x20000000; /* 60ohms */
+ }
+
+ /*
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c
+index f1fd7a5..a1cdfa6 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -35,7 +36,6 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat,
+
+ u32 dev;
+ u32 reg;
+- u32 reg_off;
+ u32 val;
+ u32 val_lo, val_hi;
+
+@@ -44,16 +44,15 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat,
+ EnChipSels = 0;
+
+ dev = pDCTstat->dev_dct;
+- reg_off = 0x100 * dct;
+
+ ChipSel = 0; /* Find out if current configuration is capable */
+ while (DoIntlv && (ChipSel < MAX_CS_SUPPORTED)) {
+- reg = 0x40+(ChipSel<<2) + reg_off; /* Dram CS Base 0 */
+- val = Get_NB32(dev, reg);
++ reg = 0x40+(ChipSel<<2); /* Dram CS Base 0 */
++ val = Get_NB32_DCT(dev, dct, reg);
+ if ( val & (1<<CSEnable)) {
+ EnChipSels++;
+- reg = 0x60+((ChipSel>>1)<<2)+reg_off; /*Dram CS Mask 0 */
+- val = Get_NB32(dev, reg);
++ reg = 0x60+((ChipSel>>1)<<2); /*Dram CS Mask 0 */
++ val = Get_NB32_DCT(dev, dct, reg);
+ val >>= 19;
+ val &= 0x3ff;
+ val++;
+@@ -63,8 +62,8 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat,
+ /*If mask sizes not same then skip */
+ if (val != MemSize)
+ break;
+- reg = 0x80 + reg_off; /*Dram Bank Addressing */
+- val = Get_NB32(dev, reg);
++ reg = 0x80; /*Dram Bank Addressing */
++ val = Get_NB32_DCT(dev, dct, reg);
+ val >>= (ChipSel>>1)<<2;
+ val &= 0x0f;
+ if(EnChipSels == 1)
+@@ -103,8 +102,8 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat,
+ BitDelta = bsf(AddrHiMask) - bsf(AddrLoMask);
+
+ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel++) {
+- reg = 0x40+(ChipSel<<2) + reg_off; /*Dram CS Base 0 */
+- val = Get_NB32(dev, reg);
++ reg = 0x40+(ChipSel<<2); /*Dram CS Base 0 */
++ val = Get_NB32_DCT(dev, dct, reg);
+ if (val & 3) {
+ val_lo = val & AddrLoMask;
+ val_hi = val & AddrHiMask;
+@@ -114,13 +113,13 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat,
+ val_hi >>= BitDelta;
+ val |= val_lo;
+ val |= val_hi;
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+
+ if(ChipSel & 1)
+ continue;
+
+- reg = 0x60 + ((ChipSel>>1)<<2) + reg_off; /*Dram CS Mask 0 */
+- val = Get_NB32(dev, reg);
++ reg = 0x60 + ((ChipSel>>1)<<2); /*Dram CS Mask 0 */
++ val = Get_NB32_DCT(dev, dct, reg);
+ val_lo = val & AddrLoMask;
+ val_hi = val & AddrHiMask;
+ val &= AddrLoMaskN;
+@@ -129,7 +128,7 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat,
+ val_hi >>= BitDelta;
+ val |= val_lo;
+ val |= val_hi;
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ }
+ }
+ } /* DoIntlv */
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+index cc2f43a..740edae 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+@@ -18,6 +18,12 @@
+ * Foundation, Inc.
+ */
+
++static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay,
++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg);
++
++static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay,
++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg);
++
+ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u16 like,
+ u8 scale, u8 ChipSel);
+@@ -37,7 +43,7 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
+ u32 addr_lo);
+ static void SetTargetWTIO_D(u32 TestAddr);
+ static void ResetTargetWTIO_D(void);
+-void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
++void ResetDCTWrPtr_D(u32 dev, uint8_t dct, u32 index_reg, u32 index);
+ u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
+@@ -54,6 +60,7 @@ static void proc_IOCLFLUSH_D(u32 addr_hi);
+ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel);
+
+ #define DQS_TRAIN_DEBUG 0
++// #define PRINT_PASS_FAIL_BITMAPS 1
+
+ static void print_debug_dqs(const char *str, u32 val, u8 level)
+ {
+@@ -198,18 +205,20 @@ void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat = pDCTstatA + Node;
+
+ if (pDCTstat->DCTSysLimit) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x78);
+- val |= 1 <<DqsRcvEnTrain;
+- Set_NB32(pDCTstat->dev_dct, 0x78, val);
+- val = Get_NB32(pDCTstat->dev_dct, 0x78 + 0x100);
+- val |= 1 <<DqsRcvEnTrain;
+- Set_NB32(pDCTstat->dev_dct, 0x78 + 0x100, val);
++ if (!is_fam15h()) {
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x78);
++ val |= 1 <<DqsRcvEnTrain;
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x78, val);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x78);
++ val |= 1 <<DqsRcvEnTrain;
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x78, val);
++ }
+ mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass);
+ }
+ }
+ }
+
+-static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
++static void SetEccDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 ChipSel)
+ {
+ u8 channel;
+@@ -268,68 +277,150 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->DQSDelay = (u8)DQSDelay;
+ }
+
+-static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++static void read_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint32_t dword;
++ uint32_t mask;
++
++ if (is_fam15h())
++ mask = 0xff;
++ else
++ mask = 0x7f;
++
++ /* Lanes 0 - 3 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x1 | (dimm << 8));
++ delay[3] = (dword >> 24) & mask;
++ delay[2] = (dword >> 16) & mask;
++ delay[1] = (dword >> 8) & mask;
++ delay[0] = dword & mask;
++
++ /* Lanes 4 - 7 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x2 | (dimm << 8));
++ delay[7] = (dword >> 24) & mask;
++ delay[6] = (dword >> 16) & mask;
++ delay[5] = (dword >> 8) & mask;
++ delay[4] = dword & mask;
++
++ /* Lane 8 (ECC) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x3 | (dimm << 8));
++ delay[8] = dword & mask;
++}
++
++static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
+ {
+ uint32_t dword;
++ uint32_t mask;
++
++ if (is_fam15h())
++ mask = 0xff;
++ else
++ mask = 0x7f;
+
+ /* Lanes 0 - 3 */
+- dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8));
+- dword &= ~0x7f7f7f7f;
+- dword |= (delay[3] & 0x7f) << 24;
+- dword |= (delay[2] & 0x7f) << 16;
+- dword |= (delay[1] & 0x7f) << 8;
+- dword |= delay[0] & 0x7f;
+- Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x1 | (dimm << 8));
++ dword &= ~(mask << 24);
++ dword &= ~(mask << 16);
++ dword &= ~(mask << 8);
++ dword &= ~mask;
++ dword |= (delay[3] & mask) << 24;
++ dword |= (delay[2] & mask) << 16;
++ dword |= (delay[1] & mask) << 8;
++ dword |= delay[0] & mask;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x1 | (dimm << 8), dword);
+
+ /* Lanes 4 - 7 */
+- dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8));
+- dword &= ~0x7f7f7f7f;
+- dword |= (delay[7] & 0x7f) << 24;
+- dword |= (delay[6] & 0x7f) << 16;
+- dword |= (delay[5] & 0x7f) << 8;
+- dword |= delay[4] & 0x7f;
+- Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x2 | (dimm << 8));
++ dword &= ~(mask << 24);
++ dword &= ~(mask << 16);
++ dword &= ~(mask << 8);
++ dword &= ~mask;
++ dword |= (delay[7] & mask) << 24;
++ dword |= (delay[6] & mask) << 16;
++ dword |= (delay[5] & mask) << 8;
++ dword |= delay[4] & mask;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x2 | (dimm << 8), dword);
+
+ /* Lane 8 (ECC) */
+- dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8));
+- dword &= ~0x0000007f;
+- dword |= delay[8] & 0x7f;
+- Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x3 | (dimm << 8));
++ dword &= ~mask;
++ dword |= delay[8] & mask;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x3 | (dimm << 8), dword);
+ }
+
+-static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++static void read_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
+ {
+ uint32_t dword;
++ uint32_t mask;
++
++ if (is_fam15h())
++ mask = 0x3e;
++ else
++ mask = 0x3f;
+
+ /* Lanes 0 - 3 */
+- dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8));
+- dword &= ~0x3f3f3f3f;
+- dword |= (delay[3] & 0x3f) << 24;
+- dword |= (delay[2] & 0x3f) << 16;
+- dword |= (delay[1] & 0x3f) << 8;
+- dword |= delay[0] & 0x3f;
+- Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x5 | (dimm << 8));
++ delay[3] = (dword >> 24) & mask;
++ delay[2] = (dword >> 16) & mask;
++ delay[1] = (dword >> 8) & mask;
++ delay[0] = dword & mask;
+
+ /* Lanes 4 - 7 */
+- dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8));
+- dword &= ~0x3f3f3f3f;
+- dword |= (delay[7] & 0x3f) << 24;
+- dword |= (delay[6] & 0x3f) << 16;
+- dword |= (delay[5] & 0x3f) << 8;
+- dword |= delay[4] & 0x3f;
+- Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x6 | (dimm << 8));
++ delay[7] = (dword >> 24) & mask;
++ delay[6] = (dword >> 16) & mask;
++ delay[5] = (dword >> 8) & mask;
++ delay[4] = dword & mask;
+
+ /* Lane 8 (ECC) */
+- dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8));
+- dword &= ~0x0000003f;
+- dword |= delay[8] & 0x3f;
+- Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x7 | (dimm << 8));
++ delay[8] = dword & mask;
++}
++
++static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint32_t dword;
++ uint32_t mask;
++
++ if (is_fam15h())
++ mask = 0x3e;
++ else
++ mask = 0x3f;
++
++ /* Lanes 0 - 3 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x5 | (dimm << 8));
++ dword &= ~(mask << 24);
++ dword &= ~(mask << 16);
++ dword &= ~(mask << 8);
++ dword &= ~mask;
++ dword |= (delay[3] & mask) << 24;
++ dword |= (delay[2] & mask) << 16;
++ dword |= (delay[1] & mask) << 8;
++ dword |= delay[0] & mask;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x5 | (dimm << 8), dword);
++
++ /* Lanes 4 - 7 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x6 | (dimm << 8));
++ dword &= ~(mask << 24);
++ dword &= ~(mask << 16);
++ dword &= ~(mask << 8);
++ dword &= ~mask;
++ dword |= (delay[7] & mask) << 24;
++ dword |= (delay[6] & mask) << 16;
++ dword |= (delay[5] & mask) << 8;
++ dword |= delay[4] & mask;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x6 | (dimm << 8), dword);
++
++ /* Lane 8 (ECC) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x7 | (dimm << 8));
++ dword &= ~mask;
++ dword |= delay[8] & mask;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x7 | (dimm << 8), dword);
+ }
+
+ /* DQS Position Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3
+ */
+-static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
++static void TrainDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+ {
+ u32 Errors;
+@@ -406,7 +497,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */
+ continue;
+
+- index_reg = 0x98 + 0x100 * Channel;
++ index_reg = 0x98;
+
+ dual_rank = 0;
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+@@ -462,7 +553,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ break;
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+- write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, Channel, (Receiver >> 1), index_reg);
+
+ /* Write the DRAM training pattern to the base test address */
+ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+@@ -479,7 +570,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ current_read_dqs_delay[lane] = test_read_dqs_delay;
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+- write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, Channel, (Receiver >> 1), index_reg);
+
+ /* Initialize test result variable */
+ bytelane_test_results = 0xff;
+@@ -545,7 +636,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ passing_dqs_delay_found[lane] = 1;
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+- write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, Channel, (Receiver >> 1), index_reg);
+
+ /* Exit the DRAM Write Data Timing Loop */
+ write_dqs_delay_stepping_done[lane] = 1;
+@@ -579,7 +670,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ current_write_dqs_delay[lane] = test_write_dqs_delay;
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+- write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, Channel, (Receiver >> 1), index_reg);
+
+ /* Write the DRAM training pattern to the base test address */
+ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+@@ -674,7 +765,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+- write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, Channel, (Receiver >> 1), index_reg);
+
+ /* Save the final Read DQS Timing Control settings for later use */
+ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane];
+@@ -717,7 +808,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ current_write_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+- write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, Channel, (Receiver >> 1), index_reg);
+
+ /* Save the final Write Data Timing settings for later use */
+ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane];
+@@ -787,6 +878,831 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ printk(BIOS_DEBUG, "TrainDQSRdWrPos: Done\n\n");
+ }
+
++/* Calcuate and set MaxRdLatency
++ * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.5
++ */
++static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct)
++{
++ uint8_t dimm;
++ uint8_t lane;
++ uint32_t dword;
++ uint32_t dword2;
++ uint32_t max_delay;
++ uint8_t mem_clk = 0;
++ uint8_t nb_pstate;
++ uint32_t nb_clk;
++ uint32_t p = 0;
++ uint32_t n = 0;
++ uint32_t t = 0;
++ uint16_t current_phy_phase_delay[MAX_BYTE_LANES];
++ uint16_t current_read_dqs_delay[MAX_BYTE_LANES];
++
++ uint32_t index_reg = 0x98;
++ uint32_t dev = pDCTstat->dev_dct;
++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
++
++ /* P is specified in PhyCLKs (1/2 MEMCLKs) */
++ for (nb_pstate = 0; nb_pstate < 2; nb_pstate++) {
++ /* 2.10.5.8.5 (2) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004);
++ if ((!(dword & (0x1 << 21))) && (!(dword & (0x1 << 13))) && (!(dword & (0x1 << 5))))
++ p += 1;
++ else
++ p += 2;
++
++ /* 2.10.5.8.5 (3) */
++ dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210) & 0xf; /* Retrieve RdPtrInit */
++ p += (9 - dword);
++
++ /* 2.10.5.8.5 (4) */
++ p += 5;
++
++ /* 2.10.5.8.5 (5) */
++ dword = Get_NB32_DCT(dev, dct, 0xa8);
++ dword2 = Get_NB32_DCT(dev, dct, 0x90);
++ if ((!(dword & (0x1 << 5))) && (!(dword2 & (0x1 << 16))))
++ p += 2;
++
++ /* 2.10.5.8.5 (6) */
++ dword = Get_NB32_DCT(dev, dct, 0x200) & 0x1f; /* Retrieve Tcl */
++ p += (2 * (dword - 1));
++
++ /* 2.10.5.8.5 (7) */
++ max_delay = 0;
++ for (dimm = 0; dimm < 4; dimm++) {
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, dimm * 2))
++ continue;
++
++ read_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg);
++ read_read_dqs_timing_control_registers(current_read_dqs_delay, dev, dct, dimm, index_reg);
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
++ if ((current_phy_phase_delay[lane] + current_read_dqs_delay[lane]) > max_delay)
++ max_delay = (current_phy_phase_delay[lane] + current_read_dqs_delay[lane]);
++ }
++ p += (max_delay >> 5);
++
++ /* 2.10.5.8.5 (8) */
++ p += 5;
++
++ /* 2.10.5.8.5 (9) */
++ t += 800;
++
++ /* 2.10.5.8.5 (10) */
++ mem_clk = Get_NB32_DCT(dev, dct, 0x94) & 0x1f;
++ dword = Get_NB32(pDCTstat->dev_nbctl, (0x160 + (nb_pstate * 4))); /* Retrieve NbDid, NbFid */
++ nb_clk = (200 * (((dword >> 1) & 0x1f) + 0x4)) / (((dword >> 7) & 0x1)?2:1);
++ n = (((((uint64_t)p * 1000000000000ULL)/(((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL) * 2)) + ((uint64_t)t)) * ((uint64_t)nb_clk * 1000)) / 1000000000ULL;
++
++ /* 2.10.5.8.5 (11) */
++ n -= 1;
++
++ /* 2.10.5.8.5 (12) */
++ dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210);
++ dword &= ~(0x3ff << 22);
++ dword |= (((n - 1) & 0x3ff) << 22);
++ Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword);
++
++ /* Save result for later use */
++ pDCTstat->CH_MaxRdLat[dct] = n;
++ }
++}
++
++static void start_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver)
++{
++ uint32_t dword;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ /* 2.10.5.7.1.1
++ * It appears that the DCT only supports 8-beat burst length mode,
++ * so do nothing here...
++ */
++
++ /* Wait for CmdSendInProg == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ } while (dword & (0x1 << 12));
++
++ /* Set CmdTestEnable = 1 */
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword |= (0x1 << 2);
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ /* 2.10.5.8.6.1.1 Send Activate Command (Target A) */
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
++ dword |= ((0x1 << Receiver) << 22);
++ dword &= ~(0x7 << 19); /* CmdBank = 0 */
++ dword &= ~(0x3ffff); /* CmdAddress = 0 */
++ dword |= (0x1 << 31); /* SendActCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x28c, dword);
++
++ /* Wait for SendActCmd == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ } while (dword & (0x1 << 31));
++
++ /* Wait 75 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75);
++
++ /* 2.10.5.8.6.1.1 Send Activate Command (Target B) */
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
++ dword |= ((0x1 << Receiver) << 22);
++ dword &= ~(0x7 << 19); /* CmdBank = 1 */
++ dword |= (0x1 << 19);
++ dword &= ~(0x3ffff); /* CmdAddress = 0 */
++ dword |= (0x1 << 31); /* SendActCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x28c, dword);
++
++ /* Wait for SendActCmd == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ } while (dword & (0x1 << 31));
++
++ /* Wait 75 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75);
++}
++
++static void stop_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver)
++{
++ uint32_t dword;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ /* 2.10.5.8.6.1.1 Send Precharge Command */
++ /* Wait 25 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
++
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
++ dword |= ((0x1 << Receiver) << 22);
++ dword &= ~(0x7 << 19); /* CmdBank = 0 */
++ dword &= ~(0x3ffff); /* CmdAddress = 0x400 */
++ dword |= 0x400;
++ dword |= (0x1 << 30); /* SendPchgCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x28c, dword);
++
++ /* Wait for SendPchgCmd == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ } while (dword & (0x1 << 30));
++
++ /* Wait 25 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
++
++ /* Set CmdTestEnable = 0 */
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword &= ~(0x1 << 2);
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++}
++
++static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver, uint8_t lane)
++{
++ uint32_t dword;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ start_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver);
++
++ /* 2.10.5.8.6.1.2 */
++ /* Configure DQMask */
++ if (lane < 4) {
++ Set_NB32_DCT(dev, dct, 0x274, ~(0xff << (lane * 8)));
++ Set_NB32_DCT(dev, dct, 0x278, ~0x0);
++ } else if (lane < 8) {
++ Set_NB32_DCT(dev, dct, 0x274, ~0x0);
++ Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8)));
++ } else {
++ Set_NB32_DCT(dev, dct, 0x274, ~0x0);
++ Set_NB32_DCT(dev, dct, 0x278, ~0x0);
++ }
++
++ dword = Get_NB32_DCT(dev, dct, 0x27c);
++ dword &= ~(0xff); /* EccMask = 0 */
++ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0))
++ dword |= 0xff; /* EccMask = 0xff */
++ Set_NB32_DCT(dev, dct, 0x27c, dword);
++
++ dword = Get_NB32_DCT(dev, dct, 0x270);
++ dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */
++// dword |= (0x55555);
++ dword |= (0x44443); /* Use AGESA seed */
++ Set_NB32_DCT(dev, dct, 0x270, dword);
++
++ /* 2.10.5.8.4 */
++ dword = Get_NB32_DCT(dev, dct, 0x260);
++ dword &= ~(0x1fffff); /* CmdCount = 256 */
++ dword |= 256;
++ Set_NB32_DCT(dev, dct, 0x260, dword);
++
++ /* Configure Target A */
++ dword = Get_NB32_DCT(dev, dct, 0x254);
++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
++ dword |= (Receiver & 0x7) << 24;
++ dword &= ~(0x7 << 21); /* TgtBank = 0 */
++ dword &= ~(0x3ff); /* TgtAddress = 0 */
++ Set_NB32_DCT(dev, dct, 0x254, dword);
++
++ /* Configure Target B */
++ dword = Get_NB32_DCT(dev, dct, 0x258);
++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
++ dword |= (Receiver & 0x7) << 24;
++ dword &= ~(0x7 << 21); /* TgtBank = 1 */
++ dword |= (0x1 << 21);
++ dword &= ~(0x3ff); /* TgtAddress = 0 */
++ Set_NB32_DCT(dev, dct, 0x258, dword);
++
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword |= (0x1 << 3); /* ResetAllErr = 1 */
++ dword &= ~(0x1 << 4); /* StopOnErr = 0 */
++ dword &= ~(0x3 << 8); /* CmdTgt = 1 (Alternate between Target A and Target B) */
++ dword |= (0x1 << 8);
++ dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */
++ dword |= (0x1 << 11); /* SendCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10))));
++
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword &= ~(0x1 << 11); /* SendCmd = 0 */
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ stop_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver);
++}
++
++static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver, uint8_t lane)
++{
++ uint32_t dword;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ start_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver);
++
++ /* 2.10.5.8.6.1.2 */
++ /* Configure DQMask */
++ if (lane < 4) {
++ Set_NB32_DCT(dev, dct, 0x274, ~(0xff << (lane * 8)));
++ Set_NB32_DCT(dev, dct, 0x278, ~0x0);
++ } else if (lane < 8) {
++ Set_NB32_DCT(dev, dct, 0x274, ~0x0);
++ Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8)));
++ } else {
++ Set_NB32_DCT(dev, dct, 0x274, ~0x0);
++ Set_NB32_DCT(dev, dct, 0x278, ~0x0);
++ }
++
++ dword = Get_NB32_DCT(dev, dct, 0x27c);
++ dword &= ~(0xff); /* EccMask = 0 */
++ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0))
++ dword |= 0xff; /* EccMask = 0xff */
++ Set_NB32_DCT(dev, dct, 0x27c, dword);
++
++ dword = Get_NB32_DCT(dev, dct, 0x270);
++ dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */
++// dword |= (0x55555);
++ dword |= (0x44443); /* Use AGESA seed */
++ Set_NB32_DCT(dev, dct, 0x270, dword);
++
++ /* 2.10.5.8.4 */
++ dword = Get_NB32_DCT(dev, dct, 0x260);
++ dword &= ~(0x1fffff); /* CmdCount = 256 */
++ dword |= 256;
++ Set_NB32_DCT(dev, dct, 0x260, dword);
++
++ /* Configure Target A */
++ dword = Get_NB32_DCT(dev, dct, 0x254);
++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
++ dword |= (Receiver & 0x7) << 24;
++ dword &= ~(0x7 << 21); /* TgtBank = 0 */
++ dword &= ~(0x3ff); /* TgtAddress = 0 */
++ Set_NB32_DCT(dev, dct, 0x254, dword);
++
++ /* Configure Target B */
++ dword = Get_NB32_DCT(dev, dct, 0x258);
++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
++ dword |= (Receiver & 0x7) << 24;
++ dword &= ~(0x7 << 21); /* TgtBank = 1 */
++ dword |= (0x1 << 21);
++ dword &= ~(0x3ff); /* TgtAddress = 0 */
++ Set_NB32_DCT(dev, dct, 0x258, dword);
++
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword |= (0x1 << 3); /* ResetAllErr = 1 */
++ dword &= ~(0x1 << 4); /* StopOnErr = 0 */
++ dword &= ~(0x3 << 8); /* CmdTgt = 1 (Alternate between Target A and Target B) */
++ dword |= (0x1 << 8);
++ dword &= ~(0x7 << 5); /* CmdType = 1 (Write) */
++ dword |= (0x1 << 5);
++ dword |= (0x1 << 11); /* SendCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10))));
++
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword &= ~(0x1 << 11); /* SendCmd = 0 */
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ stop_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver);
++}
++
++/* DQS Position Training
++ * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.4
++ */
++static uint8_t TrainDQSRdWrPos_D_Fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t receiver_start, uint8_t receiver_end, uint8_t lane_start, uint8_t lane_end)
++{
++ uint8_t dimm;
++ uint8_t lane;
++ uint32_t dword;
++ uint32_t Errors;
++ uint8_t Receiver;
++ uint8_t dual_rank;
++ uint8_t write_iter;
++ uint8_t read_iter;
++ uint16_t initial_write_dqs_delay[MAX_BYTE_LANES];
++ uint16_t initial_read_dqs_delay[MAX_BYTE_LANES];
++ uint16_t initial_write_data_timing[MAX_BYTE_LANES];
++ uint16_t current_write_data_delay[MAX_BYTE_LANES];
++ uint16_t current_read_dqs_delay[MAX_BYTE_LANES];
++ uint16_t current_write_dqs_delay[MAX_BYTE_LANES];
++ uint8_t passing_dqs_delay_found[MAX_BYTE_LANES];
++ uint8_t dqs_results_array[2][(lane_end - lane_start)][32][32]; /* [rank][lane][write step][read step] */
++
++ uint8_t last_pos = 0;
++ uint8_t cur_count = 0;
++ uint8_t best_pos = 0;
++ uint8_t best_count = 0;
++
++ uint32_t index_reg = 0x98;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ /* Calculate and program MaxRdLatency */
++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct);
++
++ Errors = 0;
++ dual_rank = 0;
++ Receiver = mct_InitReceiver_D(pDCTstat, dct);
++ if (receiver_start > Receiver)
++ Receiver = receiver_start;
++
++ /* There are four receiver pairs, loosely associated with chipselects.
++ * This is essentially looping over each DIMM.
++ */
++ for (; Receiver < receiver_end; Receiver += 2) {
++ dimm = (Receiver >> 1);
++ if ((Receiver & 0x1) == 0) {
++ /* Even rank of DIMM */
++ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver+1))
++ dual_rank = 1;
++ else
++ dual_rank = 0;
++ }
++
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver)) {
++ continue;
++ }
++
++ /* Initialize variables */
++ for (lane = lane_start; lane < lane_end; lane++) {
++ passing_dqs_delay_found[lane] = 0;
++ }
++ memset(dqs_results_array, 0, sizeof(dqs_results_array));
++
++ /* Read initial read / write DQS delays */
++ read_dqs_write_timing_control_registers(initial_write_dqs_delay, dev, dct, dimm, index_reg);
++ read_dqs_read_data_timing_registers(initial_read_dqs_delay, dev, dct, dimm, index_reg);
++
++ /* Read current settings of other (previously trained) lanes */
++ read_dqs_write_data_timing_registers(initial_write_data_timing, dev, dct, dimm, index_reg);
++ memcpy(current_write_data_delay, initial_write_data_timing, sizeof(current_write_data_delay));
++
++ for (lane = lane_start; lane < lane_end; lane++) {
++ /* 2.10.5.8.4 (2)
++ * For each Write Data Delay value from Write DQS Delay to Write DQS Delay + 1 UI
++ */
++ for (current_write_data_delay[lane] = initial_write_dqs_delay[lane]; current_write_data_delay[lane] < (initial_write_dqs_delay[lane] + 0x20); current_write_data_delay[lane]++) {
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 current_write_data_delay[lane] ", current_write_data_delay[lane], 6);
++
++ /* 2.10.5.8.4 (2 A)
++ * Commit the current Write Data Timing settings to the hardware registers
++ */
++ write_dqs_write_data_timing_registers(current_write_data_delay, dev, dct, dimm, index_reg);
++
++ /* 2.10.5.8.4 (2 B)
++ * Write the DRAM training pattern to the test address
++ */
++ write_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver, lane);
++
++ /* Read current settings of other (previously trained) lanes */
++ read_dqs_read_data_timing_registers(current_read_dqs_delay, dev, dct, dimm, index_reg);
++
++ /* 2.10.5.8.4 (2 C)
++ * For each Read DQS Delay value from 0 to 1 UI
++ */
++ for (current_read_dqs_delay[lane] = 0; current_read_dqs_delay[lane] < 0x40; current_read_dqs_delay[lane] += 2) {
++ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 current_read_dqs_delay[lane] ", current_read_dqs_delay[lane], 6);
++
++ /* 2.10.5.8.4 (2 A i)
++ * Commit the current Read DQS Timing Control settings to the hardware registers
++ */
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, dct, dimm, index_reg);
++
++ /* 2.10.5.8.4 (2 A ii)
++ * Read the DRAM training pattern from the test address
++ */
++ read_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver, lane);
++
++ /* 2.10.5.8.4 (2 A iii)
++ * Record pass / fail status
++ */
++ dword = Get_NB32_DCT(dev, dct, 0x268) & 0x3ffff;
++ if (dword & (0x3 << (lane * 2)))
++ dqs_results_array[Receiver & 0x1][lane - lane_start][current_write_data_delay[lane] - initial_write_dqs_delay[lane]][current_read_dqs_delay[lane] >> 1] = 0; /* Fail */
++ else
++ dqs_results_array[Receiver & 0x1][lane - lane_start][current_write_data_delay[lane] - initial_write_dqs_delay[lane]][current_read_dqs_delay[lane] >> 1] = 1; /* Pass */
++ }
++ }
++
++ if (dual_rank && (Receiver & 0x1)) {
++ /* Overlay the previous rank test results with the current rank */
++ for (write_iter = 0; write_iter < 32; write_iter++) {
++ for (read_iter = 0; read_iter < 32; read_iter++) {
++ if ((dqs_results_array[0][lane - lane_start][write_iter][read_iter])
++ && (dqs_results_array[1][lane - lane_start][write_iter][read_iter]))
++ dqs_results_array[1][lane - lane_start][write_iter][read_iter] = 1;
++ else
++ dqs_results_array[1][lane - lane_start][write_iter][read_iter] = 0;
++ }
++ }
++ }
++
++ /* Determine location and length of longest consecutive string of read passing values
++ * Output is stored in best_pos and best_count
++ */
++ last_pos = 0;
++ cur_count = 0;
++ best_pos = 0;
++ best_count = 0;
++ for (write_iter = 0; write_iter < 32; write_iter++) {
++ for (read_iter = 0; read_iter < 32; read_iter++) {
++ if ((dqs_results_array[Receiver & 0x1][lane - lane_start][write_iter][read_iter]) && (read_iter < 31)) {
++ /* Pass */
++ cur_count++;
++ } else {
++ /* Failure or end of loop */
++ if (cur_count > best_count) {
++ best_count = cur_count;
++ best_pos = last_pos;
++ }
++ cur_count = 0;
++ last_pos = read_iter;
++ }
++ }
++ last_pos = 0;
++ }
++
++ if (best_count > 2) {
++ /* Restore current settings of other (previously trained) lanes to the active array */
++ memcpy(current_read_dqs_delay, initial_read_dqs_delay, sizeof(current_read_dqs_delay));
++
++ /* Program the Read DQS Timing Control register with the center of the passing window */
++ current_read_dqs_delay[lane] = ((best_pos << 1) + ((best_count << 1) / 2));
++ passing_dqs_delay_found[lane] = 1;
++
++ /* Commit the current Read DQS Timing Control settings to the hardware registers */
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, dct, dimm, index_reg);
++
++ /* Save the final Read DQS Timing Control settings for later use */
++ pDCTstat->CH_D_DIR_B_DQS[dct][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane];
++
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest read passing region ", best_count, 4);
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest read passing region start ", best_pos, 4);
++ } else {
++ /* Reprogram the Read DQS Timing Control register with the original settings */
++ write_dqs_read_data_timing_registers(initial_read_dqs_delay, dev, dct, dimm, index_reg);
++ }
++
++ /* Determine location and length of longest consecutive string of write passing values
++ * Output is stored in best_pos and best_count
++ */
++ last_pos = 0;
++ cur_count = 0;
++ best_pos = 0;
++ best_count = 0;
++ for (read_iter = 0; read_iter < 32; read_iter++) {
++ for (write_iter = 0; write_iter < 32; write_iter++) {
++ if ((dqs_results_array[Receiver & 0x1][lane - lane_start][write_iter][read_iter]) && (write_iter < 31)) {
++ /* Pass */
++ cur_count++;
++ } else {
++ /* Failure or end of loop */
++ if (cur_count > best_count) {
++ best_count = cur_count;
++ best_pos = last_pos;
++ }
++ cur_count = 0;
++ last_pos = write_iter;
++ }
++ }
++ last_pos = 0;
++ }
++
++ if (best_count > 2) {
++ /* Restore current settings of other (previously trained) lanes to the active array */
++ memcpy(current_write_dqs_delay, initial_write_data_timing, sizeof(current_write_data_delay));
++
++ /* Program the Write DQS Timing Control register with the optimal region within the passing window */
++ if (pDCTstat->Status & (1 << SB_LoadReduced))
++ current_write_dqs_delay[lane] = ((best_pos + initial_write_dqs_delay[lane]) + (best_count / 3));
++ else
++ current_write_dqs_delay[lane] = ((best_pos + initial_write_dqs_delay[lane]) + (best_count / 2));
++ passing_dqs_delay_found[lane] = 1;
++
++ /* Commit the current Write DQS Timing Control settings to the hardware registers */
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, dct, dimm, index_reg);
++
++ /* Save the final Write Data Timing settings for later use */
++ pDCTstat->CH_D_DIR_B_DQS[dct][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane];
++
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest write passing region ", best_count, 4);
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region start ", best_pos, 4);
++ } else {
++ /* Reprogram the Write DQS Timing Control register with the original settings */
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, dct, dimm, index_reg);
++ }
++ }
++
++#ifdef PRINT_PASS_FAIL_BITMAPS
++ for (lane = lane_start; lane < lane_end; lane++) {
++ for (read_iter = 0; read_iter < 32; read_iter++) {
++ for (write_iter = 0; write_iter < 32; write_iter++) {
++ if (dqs_results_array[Receiver & 0x1][lane - lane_start][write_iter][read_iter])
++ printk(BIOS_DEBUG, "+");
++ else
++ printk(BIOS_DEBUG, ".");
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ printk(BIOS_DEBUG, "\n\n");
++ }
++#endif
++
++ /* Flag failure(s) if present */
++ for (lane = lane_start; lane < lane_end; lane++) {
++ if (!passing_dqs_delay_found[lane]) {
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2);
++
++ /* Flag absence of passing window */
++ Errors |= 1 << SB_NODQSPOS;
++ }
++ }
++
++ pDCTstat->TrainErrors |= Errors;
++ pDCTstat->ErrStatus |= Errors;
++
++#if DQS_TRAIN_DEBUG > 0
++ {
++ u8 val;
++ u8 i;
++ u8 ChannelDTD, ReceiverDTD, Dir;
++ u8 *p;
++
++ for (Dir = 0; Dir < 2; Dir++) {
++ if (Dir == 1) {
++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n");
++ } else {
++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
++ }
++ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD);
++ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) {
++ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD);
++ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir];
++ for (i=0;i<8; i++) {
++ val = p[i];
++ printk(BIOS_DEBUG, " %02x", val);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++ }
++
++ }
++#endif
++ }
++
++ /* Return 1 on success, 0 on failure */
++ return !Errors;
++}
++
++/* DQS Receiver Enable Cycle Training
++ * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.3
++ */
++static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat)
++{
++ u32 Errors;
++ u8 Receiver;
++ u8 _DisableDramECC = 0;
++ u8 _Wrap32Dis = 0, _SSE2 = 0;
++
++ u32 addr;
++ u32 cr4;
++ u32 lo, hi;
++
++ uint8_t dct;
++ uint8_t prev;
++ uint8_t dimm;
++ uint8_t lane;
++ uint32_t dword;
++ uint32_t rx_en_offset;
++ uint16_t initial_phy_phase_delay[MAX_BYTE_LANES];
++ uint16_t current_phy_phase_delay[MAX_BYTE_LANES];
++ uint8_t dqs_results_array[1024];
++
++ uint16_t ren_step = 0x40;
++ uint32_t index_reg = 0x98;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ print_debug_dqs("\nTrainDQSReceiverEnCyc: Node_ID ", pDCTstat->Node_ID, 0);
++ cr4 = read_cr4();
++ if (cr4 & (1<<9)) {
++ _SSE2 = 1;
++ }
++ cr4 |= (1<<9); /* OSFXSR enable SSE2 */
++ write_cr4(cr4);
++
++ addr = HWCR;
++ _RDMSR(addr, &lo, &hi);
++ if (lo & (1<<17)) {
++ _Wrap32Dis = 1;
++ }
++ lo |= (1<<17); /* HWCR.wrap32dis */
++ _WRMSR(addr, lo, hi); /* allow 64-bit memory references in real mode */
++
++ /* Disable ECC correction of reads on the dram bus. */
++ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
++
++ Errors = 0;
++
++ for (dct = 0; dct < 2; dct++) {
++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003);
++ dword &= ~(0x3 << 13);
++ dword |= (0x1 << 13);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003, dword);
++ }
++
++ for (dct = 0; dct < 2; dct++) {
++ /* 2.10.5.6 */
++ fam15EnableTrainingMode(pMCTstat, pDCTstat, dct, 1);
++
++ /* 2.10.5.8.3 */
++ Receiver = mct_InitReceiver_D(pDCTstat, dct);
++
++ /* There are four receiver pairs, loosely associated with chipselects.
++ * This is essentially looping over each DIMM.
++ */
++ for (; Receiver < 8; Receiver += 2) {
++ dimm = (Receiver >> 1);
++
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver)) {
++ continue;
++ }
++
++ /* 2.10.5.8.3 (2) */
++ read_dqs_receiver_enable_control_registers(initial_phy_phase_delay, dev, dct, dimm, index_reg);
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ /* Initialize variables */
++ memset(dqs_results_array, 0, sizeof(dqs_results_array));
++
++ /* 2.10.5.8.3 (1) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8));
++ dword |= (0x1 << 8); /* BlockRxDqsLock = 1 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8), dword);
++
++ /* 2.10.5.8.3 (3) */
++ rx_en_offset = (initial_phy_phase_delay[lane] + 0x10) % 0x40;
++
++ /* 2.10.5.8.3 (4) */
++ for (current_phy_phase_delay[lane] = rx_en_offset; current_phy_phase_delay[lane] < 0x3ff; current_phy_phase_delay[lane] += ren_step) {
++ /* 2.10.5.8.3 (4 A) */
++ write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg);
++
++ /* Calculate and program MaxRdLatency */
++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct);
++
++ /* 2.10.5.8.3 (4 B) */
++ dqs_results_array[current_phy_phase_delay[lane]] = TrainDQSRdWrPos_D_Fam15(pMCTstat, pDCTstat, dct, Receiver, Receiver + 2, lane, lane + 1);
++ }
++
++#ifdef PRINT_PASS_FAIL_BITMAPS
++ uint16_t iter;
++ for (iter = 0; iter < 0x3ff; iter++) {
++ if (dqs_results_array[iter])
++ printk(BIOS_DEBUG, "+");
++ else
++ printk(BIOS_DEBUG, ".");
++ }
++ printk(BIOS_DEBUG, "\n");
++#endif
++
++ /* 2.10.5.8.3 (5) */
++ prev = 0;
++ for (current_phy_phase_delay[lane] = rx_en_offset; current_phy_phase_delay[lane] < 0x3ff; current_phy_phase_delay[lane] += ren_step) {
++ if ((dqs_results_array[current_phy_phase_delay[lane]] == 0) && (prev == 1)) {
++ /* Restore last known good delay */
++ current_phy_phase_delay[lane] -= ren_step;
++
++ /* 2.10.5.8.3 (5 A B) */
++ current_phy_phase_delay[lane] -= 0x10;
++
++ /* Update hardware registers with final values */
++ write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg);
++ break;
++ }
++ prev = dqs_results_array[current_phy_phase_delay[lane]];
++ }
++
++ /* 2.10.5.8.3 (6) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8));
++ dword &= ~(0x1 << 8); /* BlockRxDqsLock = 0 */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8), dword);
++ }
++
++#if DQS_TRAIN_DEBUG > 0
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc_D_Fam15 DQS receiver enable timing: ");
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ printk(BIOS_DEBUG, " %03x", current_phy_phase_delay[lane]);
++ }
++ printk(BIOS_DEBUG, "\n");
++#endif
++ }
++ }
++
++ pDCTstat->TrainErrors |= Errors;
++ pDCTstat->ErrStatus |= Errors;
++
++#if DQS_TRAIN_DEBUG > 0
++ {
++ u8 val;
++ u8 i;
++ u8 ChannelDTD, ReceiverDTD, Dir;
++ u8 *p;
++
++ for (Dir = 0; Dir < 2; Dir++) {
++ if (Dir == 1) {
++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n");
++ } else {
++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
++ }
++ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD);
++ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) {
++ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD);
++ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir];
++ for (i=0;i<8; i++) {
++ val = p[i];
++ printk(BIOS_DEBUG, " %02x", val);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++ }
++
++ }
++#endif
++ if (_DisableDramECC) {
++ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
++ }
++ if (!_Wrap32Dis) {
++ addr = HWCR;
++ _RDMSR(addr, &lo, &hi);
++ lo &= ~(1<<17); /* restore HWCR.wrap32dis */
++ _WRMSR(addr, lo, hi);
++ }
++ if (!_SSE2){
++ cr4 = read_cr4();
++ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
++ write_cr4(cr4);
++ }
++
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: Status %x\n", pDCTstat->Status);
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: TrainErrors %x\n", pDCTstat->TrainErrors);
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: ErrStatus %x\n", pDCTstat->ErrStatus);
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: ErrCode %x\n", pDCTstat->ErrCode);
++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: Done\n\n");
++}
++
+ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u32 *buffer)
+ {
+@@ -869,18 +1785,17 @@ static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat,
+ u32 val;
+ u32 reg;
+ u32 dev = pDCTstat->dev_dct;
+- u32 reg_off;
++ uint8_t dct = 0;
+ u8 ret = 0;
+
+- if (!pDCTstat->GangedMode) {
+- reg_off = 0x100 * Channel;
+- } else {
+- reg_off = 0;
+- }
++ if (!pDCTstat->GangedMode)
++ dct = Channel;
++ else
++ dct = 0;
+
+ if (ChipSel < MAX_CS_SUPPORTED){
+- reg = 0x40 + (ChipSel << 2) + reg_off;
+- val = Get_NB32(dev, reg);
++ reg = 0x40 + (ChipSel << 2);
++ val = Get_NB32_DCT(dev, dct, reg);
+ if (val & ( 1 << 0))
+ ret = 1;
+ }
+@@ -1085,12 +2000,12 @@ u32 SetUpperFSbase(u32 addr_hi)
+ return addr_hi << 8;
+ }
+
+-void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
++void ResetDCTWrPtr_D(u32 dev, uint8_t dct, u32 index_reg, u32 index)
+ {
+ u32 val;
+
+- val = Get_NB32_index_wait(dev, index_reg, index);
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
+ }
+
+ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
+@@ -1103,9 +2018,13 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
+ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+ pDCTstat = pDCTstatA + Node;
+ if (pDCTstat->DCTSysLimit) {
+- TrainDQSRdWrPos_D(pMCTstat, pDCTstat);
+- for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
+- SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
++ if (is_fam15h()) {
++ TrainDQSReceiverEnCyc_D_Fam15(pMCTstat, pDCTstat);
++ } else {
++ TrainDQSRdWrPos_D_Fam10(pMCTstat, pDCTstat);
++ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
++ SetEccDQSRdWrPos_D_Fam10(pMCTstat, pDCTstat, ChipSel);
++ }
+ }
+ }
+ }
+@@ -1126,19 +2045,18 @@ u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
+
+ dev = pDCTstat->dev_dct;
+ reg = 0x90;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 0, reg);
+ if (val & (1<<DimmEcEn)) {
+ _DisableDramECC |= 0x01;
+ val &= ~(1<<DimmEcEn);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, 0, reg, val);
+ }
+ if (!pDCTstat->GangedMode) {
+- reg = 0x190;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 1, reg);
+ if (val & (1<<DimmEcEn)) {
+ _DisableDramECC |= 0x02;
+ val &= ~(1<<DimmEcEn);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, 1, reg, val);
+ }
+ }
+ return _DisableDramECC;
+@@ -1157,15 +2075,14 @@ void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
+
+ if ((_DisableDramECC & 0x01) == 0x01) {
+ reg = 0x90;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 0, reg);
+ val |= (1<<DimmEcEn);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, 0, reg, val);
+ }
+ if ((_DisableDramECC & 0x02) == 0x02) {
+- reg = 0x190;
+- val = Get_NB32(dev, reg);
++ val = Get_NB32_DCT(dev, 1, reg);
+ val |= (1<<DimmEcEn);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, 1, reg, val);
+ }
+ }
+
+@@ -1177,7 +2094,7 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
+ {
+ u8 ByteLane;
+ u32 val;
+- u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel;
++ u32 index_reg = 0x98;
+ u8 shift;
+ u32 dqs_delay = (u32)pDCTstat->DQSDelay;
+ u32 dev = pDCTstat->dev_dct;
+@@ -1205,7 +2122,7 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
+
+ index += (ChipSel>>1) << 8;
+
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, pDCTstat->Channel, index_reg, index);
+ if (ByteLane < 8) {
+ if (pDCTstat->Direction == DQS_WRITEDIR) {
+ dqs_delay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][ChipSel>>1][ByteLane];
+@@ -1215,7 +2132,7 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
+ }
+ val &= ~(0x7f << shift);
+ val |= (dqs_delay << shift);
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ Set_NB32_index_wait_DCT(dev, pDCTstat->Channel, index_reg, index, val);
+ }
+ }
+
+@@ -1241,7 +2158,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
+ u8 Channel, u8 receiver, u8 *valid)
+ {
+ u32 val;
+- u32 reg_off = 0;
++ uint8_t dct = 0;
+ u32 reg;
+ u32 dword;
+ u32 dev = pDCTstat->dev_dct;
+@@ -1250,12 +2167,12 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
+
+
+ if (!pDCTstat->GangedMode) {
+- reg_off = 0x100 * Channel;
++ dct = Channel;
+ }
+
+ /* get the local base addr of the chipselect */
+- reg = 0x40 + (receiver << 2) + reg_off;
+- val = Get_NB32(dev, reg);
++ reg = 0x40 + (receiver << 2);
++ val = Get_NB32_DCT(dev, dct, reg);
+
+ val &= ~0xe007c01f;
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c
+index 0c52791..11f1b2c 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -91,19 +92,21 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
+
+ /* Construct these booleans, based on setup options, for easy handling
+ later in this procedure */
+- OB_NBECC = mctGet_NVbits(NV_NBECC); /* MCA ECC (MCE) enable bit */
++ OB_NBECC = mctGet_NVbits(NV_NBECC); /* MCA ECC (MCE) enable bit */
+
+- OB_ECCRedir = mctGet_NVbits(NV_ECCRedir); /* ECC Redirection */
++ OB_ECCRedir = mctGet_NVbits(NV_ECCRedir); /* ECC Redirection */
+
+- OB_ChipKill = mctGet_NVbits(NV_ChipKill); /* ECC Chip-kill mode */
++ OB_ChipKill = mctGet_NVbits(NV_ChipKill); /* ECC Chip-kill mode */
++ OF_ScrubCTL = 0; /* Scrub CTL for Dcache, L2, and dram */
+
+- OF_ScrubCTL = 0; /* Scrub CTL for Dcache, L2, and dram */
+- nvbits = mctGet_NVbits(NV_DCBKScrub);
+- /* mct_AdjustScrub_D(pDCTstatA, &nvbits); */ /* Need not adjust */
+- OF_ScrubCTL |= (u32) nvbits << 16;
++ if (!is_fam15h()) {
++ nvbits = mctGet_NVbits(NV_DCBKScrub);
++ /* mct_AdjustScrub_D(pDCTstatA, &nvbits); */ /* Need not adjust */
++ OF_ScrubCTL |= (u32) nvbits << 16;
+
+- nvbits = mctGet_NVbits(NV_L2BKScrub);
+- OF_ScrubCTL |= (u32) nvbits << 8;
++ nvbits = mctGet_NVbits(NV_L2BKScrub);
++ OF_ScrubCTL |= (u32) nvbits << 8;
++ }
+
+ nvbits = mctGet_NVbits(NV_DramBKScrub);
+ OF_ScrubCTL |= nvbits;
+@@ -131,7 +134,7 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
+ pDCTstat->ErrStatus |= (1 << SB_DramECCDis);
+ }
+ AllECC = 0;
+- LDramECC =0;
++ LDramECC = 0;
+ }
+ } else {
+ AllECC = 0;
+@@ -140,7 +143,7 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
+ if (OB_NBECC) {
+ mct_EnableDatIntlv_D(pMCTstat, pDCTstat);
+ dev = pDCTstat->dev_nbmisc;
+- reg =0x44; /* MCA NB Configuration */
++ reg = 0x44; /* MCA NB Configuration */
+ val = Get_NB32(dev, reg);
+ val |= 1 << 22; /* EccEn */
+ Set_NB32(dev, reg, val);
+@@ -177,6 +180,10 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
+ /*WE/RE is checked because memory config may have been */
+ if((val & 3)==3) { /* Node has dram populated */
+ if (isDramECCEn_D(pDCTstat)) { /* if ECC is enabled on this dram */
++ if (is_fam15h()) {
++ /* Erratum 505 */
++ fam15h_switch_dct(pDCTstat->dev_map, 0);
++ }
+ dev = pDCTstat->dev_nbmisc;
+ val = curBase << 8;
+ if(OB_ECCRedir) {
+@@ -187,16 +194,18 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
+ Set_NB32(dev, 0x60, val); /* Dram Scrub Addr High */
+ Set_NB32(dev, 0x58, OF_ScrubCTL); /*Scrub Control */
+
+- /* Divisor should not be set deeper than
+- * divide by 16 when Dcache scrubber or
+- * L2 scrubber is enabled.
+- */
+- if ((OF_ScrubCTL & (0x1F << 16)) || (OF_ScrubCTL & (0x1F << 8))) {
+- val = Get_NB32(dev, 0x84);
+- if ((val & 0xE0000000) > 0x80000000) { /* Get F3x84h[31:29]ClkDivisor for C1 */
+- val &= 0x1FFFFFFF; /* If ClkDivisor is deeper than divide-by-16 */
+- val |= 0x80000000; /* set it to divide-by-16 */
+- Set_NB32(dev, 0x84, val);
++ if (!is_fam15h()) {
++ /* Divisor should not be set deeper than
++ * divide by 16 when Dcache scrubber or
++ * L2 scrubber is enabled.
++ */
++ if ((OF_ScrubCTL & (0x1F << 16)) || (OF_ScrubCTL & (0x1F << 8))) {
++ val = Get_NB32(dev, 0x84);
++ if ((val & 0xE0000000) > 0x80000000) { /* Get F3x84h[31:29]ClkDivisor for C1 */
++ val &= 0x1FFFFFFF; /* If ClkDivisor is deeper than divide-by-16 */
++ val |= 0x80000000; /* set it to divide-by-16 */
++ Set_NB32(dev, 0x84, val);
++ }
+ }
+ }
+ } /* this node has ECC enabled dram */
+@@ -267,8 +276,8 @@ static u8 isDramECCEn_D(struct DCTStatStruc *pDCTstat)
+ }
+ for(i=0; i<ch_end; i++) {
+ if(pDCTstat->DIMMValidDCT[i] > 0){
+- reg = 0x90 + i * 0x100; /* Dram Config Low */
+- val = Get_NB32(dev, reg);
++ reg = 0x90; /* Dram Config Low */
++ val = Get_NB32_DCT(dev, i, reg);
+ if(val & (1<<DimmEcEn)) {
+ /* set local flag 'dram ecc capable' */
+ isDimmECCEn = 1;
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c
+index 0112732..a6b9dcb 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -25,8 +26,8 @@ void mct_DramInit_Hw_D(struct MCTStatStruc *pMCTstat,
+ u32 dev = pDCTstat->dev_dct;
+
+ /*flag for selecting HW/SW DRAM Init HW DRAM Init */
+- reg = 0x90 + 0x100 * dct; /*DRAM Configuration Low */
+- val = Get_NB32(dev, reg);
++ reg = 0x90; /*DRAM Configuration Low */
++ val = Get_NB32_DCT(dev, dct, reg);
+ val |= (1<<InitDram);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, dct, reg, val);
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+index 60bc01d..5e81808 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+@@ -18,10 +18,12 @@
+ * Foundation, Inc.
+ */
+
+-static void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat);
+-static void AgesaHwWlPhase1(sMCTStruct *pMCTData,
+- sDCTStruct *pDCTData, u8 dimm, u8 pass);
++static void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
++static void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
++static void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
+ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+@@ -56,7 +58,7 @@ static void SetEccWrDQS_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pD
+ Addl_Index = 0x32;
+ Addl_Index += DimmNum * 3;
+
+- val = Get_NB32_index_wait(pDCTstat->dev_dct, Channel * 0x100 + 0x98, Addl_Index);
++ val = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, Channel, 0x98, Addl_Index);
+ if (OddByte)
+ val >>= 16;
+ /* Save WrDqs to stack for later usage */
+@@ -74,13 +76,13 @@ static void EnableAutoRefresh_D(struct MCTStatStruc *pMCTstat, struct DCTStatStr
+ {
+ u32 val;
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x8C);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C);
+ val &= ~(1 << DisAutoRefresh);
+- Set_NB32(pDCTstat->dev_dct, 0x8C, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C, val);
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x8C + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C);
+ val &= ~(1 << DisAutoRefresh);
+- Set_NB32(pDCTstat->dev_dct, 0x8C + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C, val);
+ }
+
+ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat,
+@@ -88,13 +90,13 @@ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat,
+ {
+ u32 val;
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x8C);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C);
+ val |= 1 << DisAutoRefresh;
+- Set_NB32(pDCTstat->dev_dct, 0x8C, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C, val);
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x8C + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C);
+ val |= 1 << DisAutoRefresh;
+- Set_NB32(pDCTstat->dev_dct, 0x8C + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C, val);
+ }
+
+
+@@ -118,8 +120,11 @@ static void PhyWLPass1(struct MCTStatStruc *pMCTstat,
+ DIMMValid = pDCTstat->DIMMValid;
+ PrepareC_DCT(pMCTstat, pDCTstat, dct);
+ for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
+- if (DIMMValid & (1 << (dimm << 1)))
+- AgesaHwWlPhase1(pDCTstat->C_MCTPtr, DCTPtr, dimm, FirstPass);
++ if (DIMMValid & (1 << (dimm << 1))) {
++ AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass);
++ AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass);
++ AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass);
++ }
+ }
+ }
+ }
+@@ -146,27 +151,40 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
+ pDCTstat->Speed = pDCTstat->DIMMAutoSpeed = pDCTstat->TargetFreq;
+ pDCTstat->CASL = pDCTstat->DIMMCASL = pDCTstat->TargetCASL;
+ SPD2ndTiming(pMCTstat, pDCTstat, dct);
+- ProgDramMRSReg_D(pMCTstat, pDCTstat, dct);
+- PlatformSpec_D(pMCTstat, pDCTstat, dct);
+- fenceDynTraining_D(pMCTstat, pDCTstat, dct);
++ if (!is_fam15h()) {
++ ProgDramMRSReg_D(pMCTstat, pDCTstat, dct);
++ PlatformSpec_D(pMCTstat, pDCTstat, dct);
++ fenceDynTraining_D(pMCTstat, pDCTstat, dct);
++ }
+ Restore_OnDimmMirror(pMCTstat, pDCTstat);
+ StartupDCT_D(pMCTstat, pDCTstat, dct);
+ Clear_OnDimmMirror(pMCTstat, pDCTstat);
+ SetDllSpeedUp_D(pMCTstat, pDCTstat, dct);
+ DisableAutoRefresh_D(pMCTstat, pDCTstat);
+ for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
+- if (DIMMValid & (1 << (dimm << 1)))
+- AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass);
++ if (DIMMValid & (1 << (dimm << 1))) {
++ AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass);
++ AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass);
++ AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass);
++ }
+ }
+ }
+ }
+
++static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq)
++{
++ uint16_t fam15h_next_highest_freq_tab[] = {0, 0, 0, 0, 0x6, 0, 0xa, 0, 0, 0, 0xe, 0, 0, 0, 0x12, 0, 0, 0, 0x16, 0, 0, 0, 0x16};
++ return fam15h_next_highest_freq_tab[memclk_freq];
++}
++
+ /* Write Levelization Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1
+ */
+ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat)
++ struct DCTStatStruc *pDCTstat, uint8_t Pass)
+ {
++ uint16_t final_target_freq;
++
+ pDCTstat->C_MCTPtr = &(pDCTstat->s_C_MCTPtr);
+ pDCTstat->C_DCTPtr[0] = &(pDCTstat->s_C_DCTPtr[0]);
+ pDCTstat->C_DCTPtr[1] = &(pDCTstat->s_C_DCTPtr[1]);
+@@ -182,16 +200,39 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+ pDCTstat->DIMMValidDCT[1] = pDCTstat->DIMMValidDCT[0];
+ }
+
+- PhyWLPass1(pMCTstat, pDCTstat, 0);
+- PhyWLPass1(pMCTstat, pDCTstat, 1);
++ if (Pass == FirstPass) {
++ PhyWLPass1(pMCTstat, pDCTstat, 0);
++ PhyWLPass1(pMCTstat, pDCTstat, 1);
++ }
++
++ if (Pass == SecondPass) {
++ if (pDCTstat->TargetFreq > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
++ /* 8.Prepare the memory subsystem for the target MEMCLK frequency.
++ * NOTE: BIOS must program both DCTs to the same frequency.
++ * NOTE: Fam15h steps the frequency, Fam10h slams the frequency.
++ */
++ final_target_freq = pDCTstat->TargetFreq;
++
++ while (pDCTstat->Speed != final_target_freq) {
++ if (is_fam15h())
++ pDCTstat->TargetFreq = fam15h_next_highest_memclk_freq(pDCTstat->Speed);
++ else
++ pDCTstat->TargetFreq = final_target_freq;
++ SetTargetFreq(pMCTstat, pDCTstat);
++ PhyWLPass2(pMCTstat, pDCTstat, 0);
++ PhyWLPass2(pMCTstat, pDCTstat, 1);
++ }
++
++ pDCTstat->TargetFreq = final_target_freq;
+
+- if (pDCTstat->TargetFreq > 4) {
+- /* 8.Prepare the memory subsystem for the target MEMCLK frequency.
+- * Note: BIOS must program both DCTs to the same frequency.
+- */
+- SetTargetFreq(pMCTstat, pDCTstat);
+- PhyWLPass2(pMCTstat, pDCTstat, 0);
+- PhyWLPass2(pMCTstat, pDCTstat, 1);
++ uint8_t dct;
++ for (dct = 0; dct < 2; dct++) {
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++ memcpy(pDCTData->WLGrossDelayFinalPass, pDCTData->WLGrossDelayPrevPass, sizeof(pDCTData->WLGrossDelayPrevPass));
++ memcpy(pDCTData->WLFineDelayFinalPass, pDCTData->WLFineDelayPrevPass, sizeof(pDCTData->WLFineDelayPrevPass));
++ pDCTData->WLCriticalGrossDelayFinalPass = pDCTData->WLCriticalGrossDelayPrevPass;
++ }
++ }
+ }
+
+ SetEccWrDQS_D(pMCTstat, pDCTstat);
+@@ -200,7 +241,7 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+ }
+
+ void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstatA)
++ struct DCTStatStruc *pDCTstatA, uint8_t Pass)
+ {
+ u8 Node;
+
+@@ -211,7 +252,7 @@ void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+ if (pDCTstat->NodePresent) {
+ mctSMBhub_Init(Node);
+ Clear_OnDimmMirror(pMCTstat, pDCTstat);
+- WriteLevelization_HW(pMCTstat, pDCTstat);
++ WriteLevelization_HW(pMCTstat, pDCTstat, Pass);
+ Restore_OnDimmMirror(pMCTstat, pDCTstat);
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+index cda9c6b..5ef4a2c 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+@@ -34,7 +34,7 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
+
+ if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
+ misc2 |= 1 << OdtSwizzle;
+- val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x78);
+
+ val &= 7;
+ val = ((~val) & 0xff) + 1;
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
+index bd8b7fb..5ea7fa6 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -23,7 +24,6 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+ u8 Dimms, DimmNum, MaxDimm, Speed;
+ u32 val;
+ u32 dct = 0;
+- u32 reg_off = 0;
+
+ DimmNum = (MrsChipSel >> 20) & 0xFE;
+
+@@ -41,7 +41,6 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+ dct = 1;
+ DimmNum ++;
+ }
+- reg_off = 0x100 * dct;
+ Dimms = pDCTstat->MAdimms[dct];
+
+ val = 0;
+@@ -95,21 +94,21 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
+ static void mct_SendCtrlWrd(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u32 val)
+ {
+- u32 reg_off = 0;
++ uint8_t dct = 0;
+ u32 dev = pDCTstat->dev_dct;
+
+ if (pDCTstat->CSPresent_DCT[0] > 0) {
+- reg_off = 0;
++ dct = 0;
+ } else if (pDCTstat->CSPresent_DCT[1] > 0 ){
+- reg_off = 0x100;
++ dct = 1;
+ }
+
+- val |= Get_NB32(dev, reg_off + 0x7C) & ~0xFFFFFF;
++ val |= Get_NB32_DCT(dev, dct, 0x7C) & ~0xFFFFFF;
+ val |= 1 << SendControlWord;
+- Set_NB32(dev, reg_off + 0x7C, val);
++ Set_NB32_DCT(dev, dct, 0x7C, val);
+
+ do {
+- val = Get_NB32(dev, reg_off + 0x7C);
++ val = Get_NB32_DCT(dev, dct, 0x7C);
+ } while (val & (1 << SendControlWord));
+ }
+
+@@ -119,7 +118,6 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+ u8 MrsChipSel;
+ u32 dev = pDCTstat->dev_dct;
+ u32 val, cw;
+- u32 reg_off = 0x100 * dct;
+
+ mct_Wait(1600);
+
+@@ -127,7 +125,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+
+ for (MrsChipSel = 0; MrsChipSel < 8; MrsChipSel ++, MrsChipSel ++) {
+ if (pDCTstat->CSPresent & (1 << MrsChipSel)) {
+- val = Get_NB32(dev, reg_off + 0xA8);
++ val = Get_NB32_DCT(dev, dct, 0xa8);
+ val &= ~(0xF << 8);
+
+ switch (MrsChipSel) {
+@@ -144,7 +142,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+ case 7:
+ val |= (3 << 6) << 8;
+ }
+- Set_NB32(dev, reg_off + 0xA8 , val);
++ Set_NB32_DCT(dev, dct, 0xa8, val);
+
+ for (cw=0; cw <=15; cw ++) {
+ mct_Wait(1600);
+@@ -171,10 +169,10 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
+ for (MrsChipSel=0; MrsChipSel < 8; MrsChipSel++, MrsChipSel++) {
+ if (pDCTstat->CSPresent & (1 << MrsChipSel)) {
+ /* 2. Program F2x[1, 0]A8[CtrlWordCS]=bit mask for target chip selects. */
+- val = Get_NB32(dev, 0xA8); /* TODO: dct * 0x100 + 0xA8 */
++ val = Get_NB32_DCT(dev, 0, 0xA8); /* TODO: dct 0 / 1 select */
+ val &= ~(0xFF << 8);
+ val |= (0x3 << (MrsChipSel & 0xFE)) << 8;
+- Set_NB32(dev, 0xA8, val); /* TODO: dct * 0x100 + 0xA8 */
++ Set_NB32_DCT(dev, 0, 0xA8, val); /* TODO: dct 0 / 1 select */
+
+ /* Resend control word 10 */
+ mct_Wait(1600);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+index b21b96a..51cbf16 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+@@ -18,17 +18,182 @@
+ * Foundation, Inc.
+ */
+
++static uint8_t fam15_dimm_dic(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type)
++{
++ uint8_t dic;
++
++ /* Calculate DIC based on recommendations in MR1_dct[1:0] */
++ if (pDCTstat->Status & (1 << SB_LoadReduced)) {
++ /* TODO
++ * LRDIMM unimplemented
++ */
++ dic = 0x0;
++ } else {
++ dic = 0x1;
++ }
++
++ return dic;
++}
++
++static uint8_t fam15_rttwr(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type)
++{
++ uint8_t term = 0;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++ uint8_t number_of_dimms = pDCTData->MaxDimmsInstalled;
++ uint8_t frequency_index;
++ uint8_t rank_count = pDCTData->DimmRanks[dimm];
++
++ if (is_fam15h())
++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++ else
++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x7;
++
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ if (is_fam15h()) {
++ if (pDCTstat->Status & (1 << SB_Registered)) {
++ /* TODO
++ * RDIMM unimplemented
++ */
++ } else {
++ if (package_type == PT_GR) {
++ /* Socket G34: Fam15h BKDG v3.14 Table 56 */
++ if (MaxDimmsInstallable == 1) {
++ term = 0x0;
++ } else if (MaxDimmsInstallable == 2) {
++ if ((number_of_dimms == 2) && (frequency_index == 0x12)) {
++ term = 0x1;
++ } else if (number_of_dimms == 1) {
++ term = 0x0;
++ } else {
++ term = 0x2;
++ }
++ } else if (MaxDimmsInstallable == 3) {
++ if (number_of_dimms == 1) {
++ if (frequency_index <= 0xa) {
++ term = 0x2;
++ } else {
++ if (rank_count < 3) {
++ term = 0x1;
++ } else {
++ term = 0x2;
++ }
++ }
++ } else if (number_of_dimms == 2) {
++ term = 0x2;
++ }
++ }
++ } else {
++ /* TODO
++ * Other sockets unimplemented
++ */
++ }
++ }
++ }
++
++ return term;
++}
++
++static uint8_t fam15_rttnom(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type)
++{
++ uint8_t term = 0;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++ uint8_t number_of_dimms = pDCTData->MaxDimmsInstalled;
++ uint8_t frequency_index;
++
++ if (is_fam15h())
++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f;
++ else
++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x7;
++
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ if (is_fam15h()) {
++ if (pDCTstat->Status & (1 << SB_LoadReduced)) {
++ /* TODO
++ * LRDIMM unimplemented
++ */
++ } else if (pDCTstat->Status & (1 << SB_Registered)) {
++ /* TODO
++ * RDIMM unimplemented
++ */
++ } else {
++ if (package_type == PT_GR) {
++ /* Socket G34: Fam15h BKDG v3.14 Table 56 */
++ if (MaxDimmsInstallable == 1) {
++ if ((frequency_index == 0x4) || (frequency_index == 0x6))
++ term = 0x2;
++ else if ((frequency_index == 0xa) || (frequency_index == 0xe))
++ term = 0x1;
++ else
++ term = 0x3;
++ }
++ if (MaxDimmsInstallable == 2) {
++ if (number_of_dimms == 1) {
++ if (frequency_index <= 0x6) {
++ term = 0x2;
++ } else if (frequency_index <= 0xe) {
++ term = 0x1;
++ } else {
++ term = 0x3;
++ }
++ } else {
++ if (frequency_index <= 0xa) {
++ term = 0x3;
++ } else if (frequency_index <= 0xe) {
++ term = 0x5;
++ } else {
++ term = 0x4;
++ }
++ }
++ } else if (MaxDimmsInstallable == 3) {
++ if (number_of_dimms == 1) {
++ term = 0x0;
++ } else if (number_of_dimms == 2) {
++ if (frequency_index <= 0xa) {
++ if (rank == 1) {
++ term = 0x0;
++ } else {
++ term = 0x3;
++ }
++ } else if (frequency_index <= 0xe) {
++ if (rank == 1) {
++ term = 0x0;
++ } else {
++ term = 0x5;
++ }
++ }
++ }
++ }
++ } else {
++ /* TODO
++ * Other sockets unimplemented
++ */
++ }
++ }
++ }
++
++ return term;
++}
++
+ static void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+
+ static void mct_DCTAccessDone(struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 val;
+
+ do {
+- val = Get_NB32(dev, reg_off + 0x98);
++ val = Get_NB32_DCT(dev, dct, 0x98);
+ } while (!(val & (1 << DctAccessDone)));
+ }
+
+@@ -54,9 +219,15 @@ static u32 swapAddrBits(struct DCTStatStruc *pDCTstat, u32 MR_register_setting,
+ if (MR_register_setting & (1 << 6)) ret |= 1 << 5;
+ if (MR_register_setting & (1 << 7)) ret |= 1 << 8;
+ if (MR_register_setting & (1 << 8)) ret |= 1 << 7;
+- if (MR_register_setting & (1 << 16)) ret |= 1 << 17;
+- if (MR_register_setting & (1 << 17)) ret |= 1 << 16;
+- MR_register_setting &= ~0x301f8;
++ if (is_fam15h()) {
++ if (MR_register_setting & (1 << 18)) ret |= 1 << 19;
++ if (MR_register_setting & (1 << 19)) ret |= 1 << 18;
++ MR_register_setting &= ~0x000c01f8;
++ } else {
++ if (MR_register_setting & (1 << 16)) ret |= 1 << 17;
++ if (MR_register_setting & (1 << 17)) ret |= 1 << 16;
++ MR_register_setting &= ~0x000301f8;
++ }
+ MR_register_setting |= ret;
+ }
+ }
+@@ -65,47 +236,76 @@ static u32 swapAddrBits(struct DCTStatStruc *pDCTstat, u32 MR_register_setting,
+
+ static void mct_SendMrsCmd(struct DCTStatStruc *pDCTstat, u8 dct, u32 EMRS)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 val;
+
+- val = Get_NB32(dev, reg_off + 0x7C);
+- val &= ~0xFFFFFF;
++ val = Get_NB32_DCT(dev, dct, 0x7c);
++ val &= ~0x00ffffff;
+ val |= EMRS;
+ val |= 1 << SendMrsCmd;
+- Set_NB32(dev, reg_off + 0x7C, val);
++ Set_NB32_DCT(dev, dct, 0x7c, val);
+
+ do {
+- val = Get_NB32(dev, reg_off + 0x7C);
++ val = Get_NB32_DCT(dev, dct, 0x7c);
+ } while (val & (1 << SendMrsCmd));
+ }
+
+ static u32 mct_MR2(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret;
+
+- ret = 0x20000;
+- ret |= MrsChipSel;
++ if (is_fam15h()) {
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++
++ /* The formula for chip select number is: CS = dimm*2+rank */
++ uint8_t dimm = MrsChipSel / 2;
++ uint8_t rank = MrsChipSel % 2;
+
+- /* program MrsAddress[5:3]=CAS write latency (CWL):
+- * based on F2x[1,0]84[Tcwl] */
+- dword = Get_NB32(dev, reg_off + 0x84);
+- dword = mct_AdjustSPDTimings(pMCTstat, pDCTstat, dword);
++ /* FIXME: These parameters should be configurable
++ * For now, err on the side of caution and enable automatic 2x refresh
++ * when the DDR temperature rises above the internal limits
++ */
++ uint8_t force_2x_self_refresh = 0; /* ASR */
++ uint8_t auto_2x_self_refresh = 1; /* SRT */
+
+- ret |= ((dword >> 20) & 7) << 3;
++ ret = 0x80000;
++ ret |= (MrsChipSel << 21);
+
+- /* program MrsAddress[6]=auto self refresh method (ASR):
+- based on F2x[1,0]84[ASR]
+- program MrsAddress[7]=self refresh temperature range (SRT):
+- based on F2x[1,0]84[ASR and SRT] */
+- ret |= ((dword >> 18) & 3) << 6;
++ /* Set self refresh parameters */
++ ret |= (force_2x_self_refresh << 6);
++ ret |= (auto_2x_self_refresh << 7);
+
+- /* program MrsAddress[10:9]=dynamic termination during writes (RTT_WR)
+- based on F2x[1,0]84[DramTermDyn] */
+- ret |= ((dword >> 10) & 3) << 9;
++ /* Obtain Tcwl, adjust, and set CWL with the adjusted value */
++ dword = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f;
++ ret |= ((dword - 5) << 3);
++
++ /* Obtain and set RttWr */
++ ret |= (fam15_rttwr(pDCTstat, dct, dimm, rank, package_type) << 9);
++ } else {
++ ret = 0x20000;
++ ret |= (MrsChipSel << 20);
++
++ /* program MrsAddress[5:3]=CAS write latency (CWL):
++ * based on F2x[1,0]84[Tcwl] */
++ dword = Get_NB32_DCT(dev, dct, 0x84);
++ dword = mct_AdjustSPDTimings(pMCTstat, pDCTstat, dword);
++
++ ret |= ((dword >> 20) & 7) << 3;
++
++ /* program MrsAddress[6]=auto self refresh method (ASR):
++ * based on F2x[1,0]84[ASR]
++ * program MrsAddress[7]=self refresh temperature range (SRT):
++ * based on F2x[1,0]84[ASR and SRT]
++ */
++ ret |= ((dword >> 18) & 3) << 6;
++
++ /* program MrsAddress[10:9]=dynamic termination during writes (RTT_WR)
++ * based on F2x[1,0]84[DramTermDyn]
++ */
++ ret |= ((dword >> 10) & 3) << 9;
++ }
+
+ return ret;
+ }
+@@ -113,20 +313,28 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat,
+ static u32 mct_MR3(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret;
+
+- ret = 0x30000;
+- ret |= MrsChipSel;
++ if (is_fam15h()) {
++ ret = 0xc0000;
++ ret |= (MrsChipSel << 21);
+
+- /* program MrsAddress[1:0]=multi purpose register address location
+- (MPR Location):based on F2x[1,0]84[MprLoc]
+- program MrsAddress[2]=multi purpose register
+- (MPR):based on F2x[1,0]84[MprEn]
+- */
+- dword = Get_NB32(dev, reg_off + 0x84);
+- ret |= (dword >> 24) & 7;
++ /* Program MPR and MPRLoc to 0 */
++ // ret |= 0x0; /* MPR */
++ // ret |= (0x0 << 2); /* MPRLoc */
++ } else {
++ ret = 0x30000;
++ ret |= (MrsChipSel << 20);
++
++ /* program MrsAddress[1:0]=multi purpose register address location
++ * (MPR Location):based on F2x[1,0]84[MprLoc]
++ * program MrsAddress[2]=multi purpose register
++ * (MPR):based on F2x[1,0]84[MprEn]
++ */
++ dword = Get_NB32_DCT(dev, dct, 0x84);
++ ret |= (dword >> 24) & 7;
++ }
+
+ return ret;
+ }
+@@ -134,48 +342,93 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat,
+ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret;
+
+- ret = 0x10000;
+- ret |= MrsChipSel;
+-
+- /* program MrsAddress[5,1]=output driver impedance control (DIC):
+- * based on F2x[1,0]84[DrvImpCtrl] */
+- dword = Get_NB32(dev, reg_off + 0x84);
+- if (dword & (1 << 3))
+- ret |= 1 << 5;
+- if (dword & (1 << 2))
+- ret |= 1 << 1;
+-
+- /* program MrsAddress[9,6,2]=nominal termination resistance of ODT (RTT):
+- based on F2x[1,0]84[DramTerm] */
+- if (!(pDCTstat->Status & (1 << SB_Registered))) {
+- if (dword & (1 << 9))
+- ret |= 1 << 9;
+- if (dword & (1 << 8))
+- ret |= 1 << 6;
+- if (dword & (1 << 7))
+- ret |= 1 << 2;
++ if (is_fam15h()) {
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++
++ /* Set defaults */
++ uint8_t qoff = 0; /* Enable output buffers */
++ uint8_t wrlvl = 0; /* Disable write levelling */
++ uint8_t tqds = 0;
++ uint8_t rttnom = 0;
++ uint8_t dic = 0;
++ uint8_t additive_latency = 0;
++ uint8_t dll_enable = 0;
++
++ ret = 0x40000;
++ ret |= (MrsChipSel << 21);
++
++ /* The formula for chip select number is: CS = dimm*2+rank */
++ uint8_t dimm = MrsChipSel / 2;
++ uint8_t rank = MrsChipSel % 2;
++
++ /* Determine if TQDS should be set */
++ if ((pDCTstat->Dimmx8Present & (1 << dimm))
++ && (((dimm & 0x1)?(pDCTstat->Dimmx4Present&0x55):(pDCTstat->Dimmx4Present&0xaa)) != 0x0)
++ && (pDCTstat->Status & (1 << SB_LoadReduced)))
++ tqds = 1;
++
++ /* Obtain RttNom */
++ rttnom = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type);
++
++ /* Obtain DIC */
++ dic = fam15_dimm_dic(pDCTstat, dct, dimm, rank, package_type);
++
++ /* Load data into MRS word */
++ ret |= (qoff & 0x1) << 12;
++ ret |= (tqds & 0x1) << 11;
++ ret |= ((rttnom & 0x4) >> 2) << 9;
++ ret |= ((rttnom & 0x2) >> 1) << 6;
++ ret |= ((rttnom & 0x1) >> 0) << 2;
++ ret |= (wrlvl & 0x1) << 7;
++ ret |= ((dic & 0x2) >> 1) << 5;
++ ret |= ((dic & 0x1) >> 0) << 1;
++ ret |= (additive_latency & 0x3) << 3;
++ ret |= (dll_enable & 0x1);
+ } else {
+- ret |= mct_MR1Odt_RDimm(pMCTstat, pDCTstat, dct, MrsChipSel);
+- }
++ ret = 0x10000;
++ ret |= (MrsChipSel << 20);
++
++ /* program MrsAddress[5,1]=output driver impedance control (DIC):
++ * based on F2x[1,0]84[DrvImpCtrl]
++ */
++ dword = Get_NB32_DCT(dev, dct, 0x84);
++ if (dword & (1 << 3))
++ ret |= 1 << 5;
++ if (dword & (1 << 2))
++ ret |= 1 << 1;
++
++ /* program MrsAddress[9,6,2]=nominal termination resistance of ODT (RTT):
++ * based on F2x[1,0]84[DramTerm]
++ */
++ if (!(pDCTstat->Status & (1 << SB_Registered))) {
++ if (dword & (1 << 9))
++ ret |= 1 << 9;
++ if (dword & (1 << 8))
++ ret |= 1 << 6;
++ if (dword & (1 << 7))
++ ret |= 1 << 2;
++ } else {
++ ret |= mct_MR1Odt_RDimm(pMCTstat, pDCTstat, dct, MrsChipSel);
++ }
+
+- /* program MrsAddress[11]=TDQS: based on F2x[1,0]94[RDqsEn] */
+- if (Get_NB32(dev, reg_off + 0x94) & (1 << RDqsEn)) {
+- u8 bit;
+- /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */
+- bit = (ret >> 21) << 1;
+- if ((dct & 1) != 0)
+- bit ++;
+- if (pDCTstat->Dimmx8Present & (1 << bit))
+- ret |= 1 << 11;
+- }
++ /* program MrsAddress[11]=TDQS: based on F2x[1,0]94[RDqsEn] */
++ if (Get_NB32_DCT(dev, dct, 0x94) & (1 << RDqsEn)) {
++ u8 bit;
++ /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */
++ bit = (ret >> 21) << 1;
++ if ((dct & 1) != 0)
++ bit ++;
++ if (pDCTstat->Dimmx8Present & (1 << bit))
++ ret |= 1 << 11;
++ }
+
+- /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */
+- if (dword & (1 << 13))
+- ret |= 1 << 12;
++ /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */
++ if (dword & (1 << 13))
++ ret |= 1 << 12;
++ }
+
+ return ret;
+ }
+@@ -183,60 +436,139 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
+ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword, ret, dword2;
+
+- ret = 0x00000;
+- ret |= MrsChipSel;
+-
+- /* program MrsAddress[1:0]=burst length and control method
+- (BL):based on F2x[1,0]84[BurstCtrl] */
+- dword = Get_NB32(dev, reg_off + 0x84);
+- ret |= dword & 3;
+-
+- /* program MrsAddress[3]=1 (BT):interleaved */
+- ret |= 1 << 3;
+-
+- /* program MrsAddress[6:4,2]=read CAS latency
+- (CL):based on F2x[1,0]88[Tcl] */
+- dword2 = Get_NB32(dev, reg_off + 0x88);
+- ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */
+- ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */
+-
+- /* program MrsAddress[12]=0 (PPD):slow exit */
+- if (dword & (1 << 23))
+- ret |= 1 << 12;
+-
+- /* program MrsAddress[11:9]=write recovery for auto-precharge
+- (WR):based on F2x[1,0]84[Twr] */
+- ret |= ((dword >> 4) & 7) << 9;
+-
+- /* program MrsAddress[8]=1 (DLL):DLL reset
+- just issue DLL reset at first time */
+- ret |= 1 << 8;
++ if (is_fam15h()) {
++ ret = 0x00000;
++ ret |= (MrsChipSel << 21);
++
++ /* Set defaults */
++ uint8_t ppd = 0;
++ uint8_t wr_ap = 0;
++ uint8_t dll_reset = 1;
++ uint8_t test_mode = 0;
++ uint8_t cas_latency = 0;
++ uint8_t read_burst_type = 1;
++ uint8_t burst_length = 0;
++
++ /* Obtain PchgPDModeSel */
++ dword = Get_NB32_DCT(dev, dct, 0x84);
++ ppd = (dword >> 23) & 0x1;
++
++ /* Obtain Twr */
++ dword = Get_NB32_DCT(dev, dct, 0x22c) & 0x1f;
++
++ /* Calculate wr_ap (Fam15h BKDG v3.14 Table 82) */
++ if (dword == 0x10)
++ wr_ap = 0x0;
++ else if (dword == 0x5)
++ wr_ap = 0x1;
++ else if (dword == 0x6)
++ wr_ap = 0x2;
++ else if (dword == 0x7)
++ wr_ap = 0x3;
++ else if (dword == 0x8)
++ wr_ap = 0x4;
++ else if (dword == 0xa)
++ wr_ap = 0x5;
++ else if (dword == 0xc)
++ wr_ap = 0x6;
++ else if (dword == 0xe)
++ wr_ap = 0x7;
++
++ /* Obtain Tcl */
++ dword = Get_NB32_DCT(dev, dct, 0x200) & 0x1f;
++
++ /* Calculate cas_latency (Fam15h BKDG v3.14 Table 83) */
++ if (dword == 0x5)
++ cas_latency = 0x2;
++ else if (dword == 0x6)
++ cas_latency = 0x4;
++ else if (dword == 0x7)
++ cas_latency = 0x6;
++ else if (dword == 0x8)
++ cas_latency = 0x8;
++ else if (dword == 0x9)
++ cas_latency = 0xa;
++ else if (dword == 0xa)
++ cas_latency = 0xc;
++ else if (dword == 0xb)
++ cas_latency = 0xe;
++ else if (dword == 0xc)
++ cas_latency = 0x1;
++ else if (dword == 0xd)
++ cas_latency = 0x3;
++ else if (dword == 0xe)
++ cas_latency = 0x5;
++ else if (dword == 0xf)
++ cas_latency = 0x7;
++ else if (dword == 0x10)
++ cas_latency = 0x9;
++
++ /* Obtain BurstCtrl */
++ burst_length = Get_NB32_DCT(dev, dct, 0x84) & 0x3;
++
++ /* Load data into MRS word */
++ ret |= (ppd & 0x1) << 12;
++ ret |= (wr_ap & 0x3) << 9;
++ ret |= (dll_reset & 0x1) << 8;
++ ret |= (test_mode & 0x1) << 7;
++ ret |= ((cas_latency & 0xe) >> 1) << 4;
++ ret |= ((cas_latency & 0x1) >> 0) << 2;
++ ret |= (read_burst_type & 0x1) << 3;
++ ret |= (burst_length & 0x3);
++ } else {
++ ret = 0x00000;
++ ret |= (MrsChipSel << 20);
++
++ /* program MrsAddress[1:0]=burst length and control method
++ (BL):based on F2x[1,0]84[BurstCtrl] */
++ dword = Get_NB32_DCT(dev, dct, 0x84);
++ ret |= dword & 3;
++
++ /* program MrsAddress[3]=1 (BT):interleaved */
++ ret |= 1 << 3;
++
++ /* program MrsAddress[6:4,2]=read CAS latency
++ (CL):based on F2x[1,0]88[Tcl] */
++ dword2 = Get_NB32_DCT(dev, dct, 0x88);
++ ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */
++ ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */
++
++ /* program MrsAddress[12]=0 (PPD):slow exit */
++ if (dword & (1 << 23))
++ ret |= 1 << 12;
++
++ /* program MrsAddress[11:9]=write recovery for auto-precharge
++ (WR):based on F2x[1,0]84[Twr] */
++ ret |= ((dword >> 4) & 7) << 9;
++
++ /* program MrsAddress[8]=1 (DLL):DLL reset
++ just issue DLL reset at first time */
++ ret |= 1 << 8;
++ }
+
+ return ret;
+ }
+
+ static void mct_SendZQCmd(struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+ u32 dword;
+
+ /*1.Program MrsAddress[10]=1
+ 2.Set SendZQCmd=1
+ */
+- dword = Get_NB32(dev, reg_off + 0x7C);
++ dword = Get_NB32_DCT(dev, dct, 0x7C);
+ dword &= ~0xFFFFFF;
+ dword |= 1 << 10;
+ dword |= 1 << SendZQCmd;
+- Set_NB32(dev, reg_off + 0x7C, dword);
++ Set_NB32_DCT(dev, dct, 0x7C, dword);
+
+ /* Wait for SendZQCmd=0 */
+ do {
+- dword = Get_NB32(dev, reg_off + 0x7C);
++ dword = Get_NB32_DCT(dev, dct, 0x7C);
+ } while (dword & (1 << SendZQCmd));
+
+ /* 4.Wait 512 MEMCLKs */
+@@ -248,31 +580,30 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat,
+ {
+ u8 MrsChipSel;
+ u32 dword;
+- u32 reg_off = 0x100 * dct;
+ u32 dev = pDCTstat->dev_dct;
+
+- if (pDCTstat->DIMMAutoSpeed == 4) {
++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
+ /* 3.Program F2x[1,0]7C[EnDramInit]=1 */
+- dword = Get_NB32(dev, reg_off + 0x7C);
++ dword = Get_NB32_DCT(dev, dct, 0x7c);
+ dword |= 1 << EnDramInit;
+- Set_NB32(dev, reg_off + 0x7C, dword);
++ Set_NB32_DCT(dev, dct, 0x7c, dword);
+ mct_DCTAccessDone(pDCTstat, dct);
+
+ /* 4.wait 200us */
+ mct_Wait(40000);
+
+- /* 5.On revision C processors, program F2x[1, 0]7C[DeassertMemRstX] = 1. */
+- dword = Get_NB32(dev, reg_off + 0x7C);
++ /* 5.Program F2x[1, 0]7C[DeassertMemRstX] = 1. */
++ dword = Get_NB32_DCT(dev, dct, 0x7c);
+ dword |= 1 << DeassertMemRstX;
+- Set_NB32(dev, reg_off + 0x7C, dword);
++ Set_NB32_DCT(dev, dct, 0x7c, dword);
+
+ /* 6.wait 500us */
+ mct_Wait(200000);
+
+ /* 7.Program F2x[1,0]7C[AssertCke]=1 */
+- dword = Get_NB32(dev, reg_off + 0x7C);
++ dword = Get_NB32_DCT(dev, dct, 0x7c);
+ dword |= 1 << AssertCke;
+- Set_NB32(dev, reg_off + 0x7C, dword);
++ Set_NB32_DCT(dev, dct, 0x7c, dword);
+
+ /* 8.wait 360ns */
+ mct_Wait(80);
+@@ -281,6 +612,13 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat,
+ * must be done for each chip select pair */
+ if (pDCTstat->Status & (1 << SB_Registered))
+ mct_DramControlReg_Init_D(pMCTstat, pDCTstat, dct);
++
++ /* The following steps are performed with load reduced DIMMs only and
++ * must be done for each DIMM */
++ // if (pDCTstat->Status & (1 << SB_LoadReduced))
++ /* TODO
++ * Implement LRDIMM configuration
++ */
+ }
+
+ /* The following steps are performed once for unbuffered DIMMs and once for each
+@@ -289,23 +627,23 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat,
+ if (pDCTstat->CSPresent & (1 << MrsChipSel)) {
+ u32 EMRS;
+ /* 13.Send EMRS(2) */
+- EMRS = mct_MR2(pMCTstat, pDCTstat, dct, MrsChipSel << 20);
++ EMRS = mct_MR2(pMCTstat, pDCTstat, dct, MrsChipSel);
+ EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct);
+ mct_SendMrsCmd(pDCTstat, dct, EMRS);
+ /* 14.Send EMRS(3). Ordinarily at this time, MrsAddress[2:0]=000b */
+- EMRS= mct_MR3(pMCTstat, pDCTstat, dct, MrsChipSel << 20);
++ EMRS= mct_MR3(pMCTstat, pDCTstat, dct, MrsChipSel);
+ EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct);
+ mct_SendMrsCmd(pDCTstat, dct, EMRS);
+ /* 15.Send EMRS(1) */
+- EMRS= mct_MR1(pMCTstat, pDCTstat, dct, MrsChipSel << 20);
++ EMRS= mct_MR1(pMCTstat, pDCTstat, dct, MrsChipSel);
+ EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct);
+ mct_SendMrsCmd(pDCTstat, dct, EMRS);
+ /* 16.Send MRS with MrsAddress[8]=1(reset the DLL) */
+- EMRS= mct_MR0(pMCTstat, pDCTstat, dct, MrsChipSel << 20);
++ EMRS= mct_MR0(pMCTstat, pDCTstat, dct, MrsChipSel);
+ EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct);
+ mct_SendMrsCmd(pDCTstat, dct, EMRS);
+
+- if (pDCTstat->DIMMAutoSpeed == 4)
++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK)))
+ if (!(pDCTstat->Status & (1 << SB_Registered)))
+ break; /* For UDIMM, only send MR commands once per channel */
+ }
+@@ -314,16 +652,15 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat,
+ MrsChipSel ++;
+ }
+
+- mct_Wait(100000);
+-
+- if (pDCTstat->DIMMAutoSpeed == 4) {
++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
+ /* 17.Send two ZQCL commands */
+ mct_SendZQCmd(pDCTstat, dct);
+ mct_SendZQCmd(pDCTstat, dct);
++
+ /* 18.Program F2x[1,0]7C[EnDramInit]=0 */
+- dword = Get_NB32(dev, reg_off + 0x7C);
++ dword = Get_NB32_DCT(dev, dct, 0x7C);
+ dword &= ~(1 << EnDramInit);
+- Set_NB32(dev, reg_off + 0x7C, dword);
++ Set_NB32_DCT(dev, dct, 0x7C, dword);
+ mct_DCTAccessDone(pDCTstat, dct);
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+index 91e8f77..011a94f 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+@@ -23,7 +23,10 @@
+ Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
+ ******************************************************************************/
+
+-static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
++static int32_t abs(int32_t val);
++static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 Pass);
++static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass);
+ static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+@@ -32,7 +35,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel);
+ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
+-static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
++static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
+
+@@ -89,11 +92,154 @@ static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
+ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass)
+ {
+- if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
+- dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
++ if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) {
++ if (is_fam15h())
++ dqsTrainRcvrEn_SW_Fam15(pMCTstat, pDCTstat, Pass);
++ else
++ dqsTrainRcvrEn_SW_Fam10(pMCTstat, pDCTstat, Pass);
++ }
+ }
+
+-static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++static uint16_t fam15_receiver_enable_training_seed(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type)
++{
++ uint32_t dword;
++ uint16_t seed = 0;
++
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ uint8_t channel = dct;
++ if (package_type == PT_GR) {
++ /* Get the internal node number */
++ dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8);
++ dword = (dword >> 30) & 0x3;
++ if (dword == 1) {
++ channel += 2;
++ }
++ }
++
++ if (pDCTstat->Status & (1 << SB_Registered)) {
++ if (package_type == PT_GR) {
++ /* Socket G34: Fam15h BKDG v3.14 Table 99 */
++ if (MaxDimmsInstallable == 1) {
++ if (channel == 0)
++ seed = 0x43;
++ else if (channel == 1)
++ seed = 0x3f;
++ else if (channel == 2)
++ seed = 0x3a;
++ else if (channel == 3)
++ seed = 0x35;
++ } else if (MaxDimmsInstallable == 2) {
++ if (channel == 0)
++ seed = 0x54;
++ else if (channel == 1)
++ seed = 0x4d;
++ else if (channel == 2)
++ seed = 0x45;
++ else if (channel == 3)
++ seed = 0x40;
++ } else if (MaxDimmsInstallable == 3) {
++ if (channel == 0)
++ seed = 0x6b;
++ else if (channel == 1)
++ seed = 0x5e;
++ else if (channel == 2)
++ seed = 0x4b;
++ else if (channel == 3)
++ seed = 0x3d;
++ }
++ } else if (package_type == PT_C3) {
++ /* Socket C32: Fam15h BKDG v3.14 Table 100 */
++ if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) {
++ if (channel == 0)
++ seed = 0x3f;
++ else if (channel == 1)
++ seed = 0x3e;
++ } else if (MaxDimmsInstallable == 3) {
++ if (channel == 0)
++ seed = 0x47;
++ else if (channel == 1)
++ seed = 0x38;
++ }
++ }
++ } else if (pDCTstat->Status & (1 << SB_LoadReduced)) {
++ if (package_type == PT_GR) {
++ /* Socket G34: Fam15h BKDG v3.14 Table 99 */
++ if (MaxDimmsInstallable == 1) {
++ if (channel == 0)
++ seed = 0x123;
++ else if (channel == 1)
++ seed = 0x122;
++ else if (channel == 2)
++ seed = 0x112;
++ else if (channel == 3)
++ seed = 0x102;
++ }
++ } else if (package_type == PT_C3) {
++ /* Socket C32: Fam15h BKDG v3.14 Table 100 */
++ if (channel == 0)
++ seed = 0x132;
++ else if (channel == 1)
++ seed = 0x122;
++ }
++ } else {
++ if (package_type == PT_GR) {
++ /* Socket G34: Fam15h BKDG v3.14 Table 99 */
++ if (MaxDimmsInstallable == 1) {
++ if (channel == 0)
++ seed = 0x3e;
++ else if (channel == 1)
++ seed = 0x38;
++ else if (channel == 2)
++ seed = 0x37;
++ else if (channel == 3)
++ seed = 0x31;
++ } else if (MaxDimmsInstallable == 2) {
++ if (channel == 0)
++ seed = 0x51;
++ else if (channel == 1)
++ seed = 0x4a;
++ else if (channel == 2)
++ seed = 0x46;
++ else if (channel == 3)
++ seed = 0x3f;
++ } else if (MaxDimmsInstallable == 3) {
++ if (channel == 0)
++ seed = 0x5e;
++ else if (channel == 1)
++ seed = 0x52;
++ else if (channel == 2)
++ seed = 0x48;
++ else if (channel == 3)
++ seed = 0x3c;
++ }
++ } else if (package_type == PT_C3) {
++ /* Socket C32: Fam15h BKDG v3.14 Table 100 */
++ if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) {
++ if (channel == 0)
++ seed = 0x39;
++ else if (channel == 1)
++ seed = 0x32;
++ } else if (MaxDimmsInstallable == 3) {
++ if (channel == 0)
++ seed = 0x45;
++ else if (channel == 1)
++ seed = 0x37;
++ }
++ } else if (package_type == PT_M2) {
++ /* Socket AM3: Fam15h BKDG v3.14 Table 101 */
++ seed = 0x3a;
++ }
++ }
++
++ return seed;
++}
++
++static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
+ {
+ uint8_t lane;
+ uint32_t dword;
+@@ -111,7 +257,7 @@ static void read_dqs_write_timing_control_registers(uint16_t* current_total_dela
+ if (lane == 8)
+ wdt_reg = 0x32;
+ wdt_reg += dimm * 3;
+- dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg);
+ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
+ current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
+ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
+@@ -119,12 +265,124 @@ static void read_dqs_write_timing_control_registers(uint16_t* current_total_dela
+ }
+ }
+
+-static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++#ifdef UNUSED_CODE
++static void write_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t ret_reg;
++ if ((lane == 0) || (lane == 1))
++ ret_reg = 0x30;
++ if ((lane == 2) || (lane == 3))
++ ret_reg = 0x31;
++ if ((lane == 4) || (lane == 5))
++ ret_reg = 0x40;
++ if ((lane == 6) || (lane == 7))
++ ret_reg = 0x41;
++ if (lane == 8)
++ ret_reg = 0x32;
++ ret_reg += dimm * 3;
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
++ dword &= ~(0xff << 16);
++ dword |= (current_total_delay[lane] & 0xff) << 16;
++ }
++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
++ dword &= ~0xff;
++ dword |= current_total_delay[lane] & 0xff;
++ }
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword);
++ }
++}
++#endif
++
++static void write_write_data_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t wdt_reg;
++
++ /* Calculate Write Data Timing register location */
++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
++ wdt_reg = 0x1;
++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
++ wdt_reg = 0x2;
++ if (lane == 8)
++ wdt_reg = 0x3;
++ wdt_reg |= (dimm << 8);
++
++ /* Set Write Data Timing register values */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg);
++ if ((lane == 7) || (lane == 3)) {
++ dword &= ~(0x7f << 24);
++ dword |= (current_total_delay[lane] & 0x7f) << 24;
++ }
++ if ((lane == 6) || (lane == 2)) {
++ dword &= ~(0x7f << 16);
++ dword |= (current_total_delay[lane] & 0x7f) << 16;
++ }
++ if ((lane == 5) || (lane == 1)) {
++ dword &= ~(0x7f << 8);
++ dword |= (current_total_delay[lane] & 0x7f) << 8;
++ }
++ if ((lane == 8) || (lane == 4) || (lane == 0)) {
++ dword &= ~0x7f;
++ dword |= current_total_delay[lane] & 0x7f;
++ }
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg, dword);
++ }
++}
++
++static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t mask;
++ uint32_t dword;
++
++ if (is_fam15h())
++ mask = 0x3ff;
++ else
++ mask = 0x1ff;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t ret_reg;
++ if ((lane == 0) || (lane == 1))
++ ret_reg = 0x10;
++ if ((lane == 2) || (lane == 3))
++ ret_reg = 0x11;
++ if ((lane == 4) || (lane == 5))
++ ret_reg = 0x20;
++ if ((lane == 6) || (lane == 7))
++ ret_reg = 0x21;
++ if (lane == 8)
++ ret_reg = 0x12;
++ ret_reg += dimm * 3;
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
++ current_total_delay[lane] = (dword & (mask << 16)) >> 16;
++ }
++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
++ current_total_delay[lane] = dword & mask;
++ }
++ }
++}
++
++static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
+ {
+ uint8_t lane;
++ uint32_t mask;
+ uint32_t dword;
+
+- for (lane = 0; lane < 8; lane++) {
++ if (is_fam15h())
++ mask = 0x3ff;
++ else
++ mask = 0x1ff;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ uint32_t ret_reg;
+ if ((lane == 0) || (lane == 1))
+ ret_reg = 0x10;
+@@ -134,17 +392,125 @@ static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_
+ ret_reg = 0x20;
+ if ((lane == 6) || (lane == 7))
+ ret_reg = 0x21;
++ if (lane == 8)
++ ret_reg = 0x12;
+ ret_reg += dimm * 3;
+- dword = Get_NB32_index_wait(dev, index_reg, ret_reg);
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg);
+ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
+- dword &= ~(0x1ff << 16);
+- dword |= (current_total_delay[lane] & 0x1ff) << 16;
++ dword &= ~(mask << 16);
++ dword |= (current_total_delay[lane] & mask) << 16;
+ }
+- if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
+- dword &= ~0x1ff;
+- dword |= current_total_delay[lane] & 0x1ff;
++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
++ dword &= ~mask;
++ dword |= current_total_delay[lane] & mask;
+ }
+- Set_NB32_index_wait(dev, index_reg, ret_reg, dword);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword);
++ }
++}
++
++static void read_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t prc_reg;
++
++ /* Calculate DRAM Phase Recovery Control register location */
++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
++ prc_reg = 0x50;
++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
++ prc_reg = 0x51;
++ if (lane == 8)
++ prc_reg = 0x52;
++
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg);
++ if ((lane == 7) || (lane == 3)) {
++ current_total_delay[lane] = (dword >> 24) & 0x7f;
++ }
++ if ((lane == 6) || (lane == 2)) {
++ current_total_delay[lane] = (dword >> 16) & 0x7f;
++ }
++ if ((lane == 5) || (lane == 1)) {
++ current_total_delay[lane] = (dword >> 8) & 0x7f;
++ }
++ if ((lane == 8) || (lane == 4) || (lane == 0)) {
++ current_total_delay[lane] = dword & 0x7f;
++ }
++ }
++}
++
++static void write_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t prc_reg;
++
++ /* Calculate DRAM Phase Recovery Control register location */
++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
++ prc_reg = 0x50;
++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
++ prc_reg = 0x51;
++ if (lane == 8)
++ prc_reg = 0x52;
++
++ /* Set DRAM Phase Recovery Control register values */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg);
++ if ((lane == 7) || (lane == 3)) {
++ dword &= ~(0x7f << 24);
++ dword |= (current_total_delay[lane] & 0x7f) << 24;
++ }
++ if ((lane == 6) || (lane == 2)) {
++ dword &= ~(0x7f << 16);
++ dword |= (current_total_delay[lane] & 0x7f) << 16;
++ }
++ if ((lane == 5) || (lane == 1)) {
++ dword &= ~(0x7f << 8);
++ dword |= (current_total_delay[lane] & 0x7f) << 8;
++ }
++ if ((lane == 8) || (lane == 4) || (lane == 0)) {
++ dword &= ~0x7f;
++ dword |= current_total_delay[lane] & 0x7f;
++ }
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg, dword);
++ }
++}
++
++static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t rdt_reg;
++
++ /* Calculate DRAM Read DQS Timing register location */
++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
++ rdt_reg = 0x5;
++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
++ rdt_reg = 0x6;
++ if (lane == 8)
++ rdt_reg = 0x7;
++ rdt_reg |= (dimm << 8);
++
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, rdt_reg);
++ if ((lane == 7) || (lane == 3)) {
++ current_total_delay[lane] = (dword >> 24) & 0x3f;
++ }
++ if ((lane == 6) || (lane == 2)) {
++ current_total_delay[lane] = (dword >> 16) & 0x3f;
++ }
++ if ((lane == 5) || (lane == 1)) {
++ current_total_delay[lane] = (dword >> 8) & 0x3f;
++ }
++ if ((lane == 8) || (lane == 4) || (lane == 0)) {
++ current_total_delay[lane] = dword & 0x3f;
++ }
++
++ if (is_fam15h())
++ current_total_delay[lane] >>= 1;
+ }
+ }
+
+@@ -160,10 +526,11 @@ static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDC
+ return testaddr;
+ }
+
+-/* DQS Receiver Enable Training
+- * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
++/* DQS Receiver Enable Training (Family 10h)
++ * Algorithm detailed in:
++ * The Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
+ */
+-static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
++static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass)
+ {
+ u8 Channel;
+@@ -171,7 +538,6 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ u8 Addl_Index = 0;
+ u8 Receiver;
+ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
+- u8 Final_Value;
+ u16 CTLRMaxDelay;
+ u16 MaxDelay_CH[2];
+ u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
+@@ -188,6 +554,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ u32 lo, hi;
+
+ uint32_t dword;
++ uint8_t dimm;
+ uint8_t rank;
+ uint8_t lane;
+ uint16_t current_total_delay[MAX_BYTE_LANES];
+@@ -214,14 +581,13 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ }
+
+ for (ch = ch_start; ch < ch_end; ch++) {
+- reg = 0x78 + (0x100 * ch);
+- val = Get_NB32(dev, reg);
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, ch, reg);
+ val &= ~(0x3ff << 22);
+- val |= (0x0c8 << 22); /* Max Rd Lat */
+- Set_NB32(dev, reg, val);
++ val |= (0x0c8 << 22); /* MaxRdLatency = 0xc8 */
++ Set_NB32_DCT(dev, ch, reg, val);
+ }
+
+- Final_Value = 1;
+ if (Pass == FirstPass) {
+ mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
+ } else {
+@@ -260,7 +626,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+
+ CTLRMaxDelay = 0;
+ MaxDelay_CH[Channel] = 0;
+- index_reg = 0x98 + 0x100 * Channel;
++ index_reg = 0x98;
+
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+ /* There are four receiver pairs, loosely associated with chipselects.
+@@ -268,6 +634,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ */
+ for (; Receiver < 8; Receiver += 2) {
+ Addl_Index = (Receiver >> 1) * 3 + 0x10;
++ dimm = (Receiver >> 1);
+
+ print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
+
+@@ -284,45 +651,14 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ /* 2.8.9.9.2 (1, 6)
+ * Retrieve gross and fine timing fields from write DQS registers
+ */
+- read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+ /* 2.8.9.9.2 (1)
+ * Program the Write Data Timing and Write ECC Timing register to
+ * the values stored in the DQS Write Timing Control register
+ * for each lane
+ */
+- for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+- uint32_t wdt_reg;
+-
+- /* Calculate Write Data Timing register location */
+- if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
+- wdt_reg = 0x1;
+- if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
+- wdt_reg = 0x2;
+- if (lane == 8)
+- wdt_reg = 0x3;
+- wdt_reg |= ((Receiver / 2) << 8);
+-
+- /* Set Write Data Timing register values */
+- dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
+- if ((lane == 7) || (lane == 3)) {
+- dword &= ~(0x7f << 24);
+- dword |= (current_total_delay[lane] & 0x7f) << 24;
+- }
+- if ((lane == 6) || (lane == 2)) {
+- dword &= ~(0x7f << 16);
+- dword |= (current_total_delay[lane] & 0x7f) << 16;
+- }
+- if ((lane == 5) || (lane == 1)) {
+- dword &= ~(0x7f << 8);
+- dword |= (current_total_delay[lane] & 0x7f) << 8;
+- }
+- if ((lane == 8) || (lane == 4) || (lane == 0)) {
+- dword &= ~0x7f;
+- dword |= current_total_delay[lane] & 0x7f;
+- }
+- Set_NB32_index_wait(dev, index_reg, wdt_reg, dword);
+- }
++ write_write_data_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+ /* 2.8.9.9.2 (2)
+ * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
+@@ -336,12 +672,12 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ rdt_reg = 0x6;
+ if (lane == 8)
+ rdt_reg = 0x7;
+- rdt_reg |= ((Receiver / 2) << 8);
++ rdt_reg |= (dimm << 8);
+ if (lane == 8)
+ dword = 0x0000003f;
+ else
+ dword = 0x3f3f3f3f;
+- Set_NB32_index_wait(dev, index_reg, rdt_reg, dword);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, rdt_reg, dword);
+ }
+
+ /* 2.8.9.9.2 (3)
+@@ -371,7 +707,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
+
+ /* 2.8.9.9.2 (4, 5)
+- * Write 1 cache line of the appropriate test pattern to each test addresse
++ * Write 1 cache line of the appropriate test pattern to each test address
+ */
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
+@@ -390,7 +726,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ /* 2.8.9.9.2 (6)
+ * Write gross and fine timing fields to read DQS registers
+ */
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+ /* 2.8.9.9.2 (7)
+ * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
+@@ -417,8 +753,8 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ break;
+
+ /* 2.8.9.9.2 (7 A)
+- * Loop over all ranks
+- */
++ * Loop over all ranks
++ */
+ for (rank = 0; rank < (_2Ranks + 1); rank++) {
+ /* 2.8.9.9.2 (7 A a-d)
+ * Read the first test address of the current rank
+@@ -434,17 +770,17 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ */
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
+ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
+ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+ } else {
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
+ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
+ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+ }
+ /* 2.8.9.9.2 (7 A e)
+ * Compare both read patterns and flag passing ranks/lanes
+@@ -533,7 +869,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ }
+
+ /* Update delays in hardware */
+- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+
+ /* Save previous results for comparison in the next iteration */
+ for (lane = 0; lane < 8; lane++)
+@@ -587,7 +923,483 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
+ }
+
+- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
++ for (Channel = 0; Channel < 2; Channel++) {
++ ResetDCTWrPtr_D(dev, Channel, index_reg, Addl_Index);
++ }
++
++ if(_DisableDramECC) {
++ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
++ }
++
++ if (Pass == FirstPass) {
++ /*Disable DQSRcvrEn training mode */
++ mct_DisableDQSRcvEn_D(pDCTstat);
++ }
++
++ if(!_Wrap32Dis) {
++ msr = HWCR;
++ _RDMSR(msr, &lo, &hi);
++ lo &= ~(1<<17); /* restore HWCR.wrap32dis */
++ _WRMSR(msr, lo, hi);
++ }
++ if(!_SSE2){
++ cr4 = read_cr4();
++ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
++ write_cr4(cr4);
++ }
++
++#if DQS_TRAIN_DEBUG > 0
++ {
++ u8 ChannelDTD;
++ printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel:%x: %x\n",
++ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
++ }
++ }
++#endif
++
++#if DQS_TRAIN_DEBUG > 0
++ {
++ u16 valDTD;
++ u8 ChannelDTD, ReceiverDTD;
++ u8 i;
++ u16 *p;
++
++ printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
++ for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
++ for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
++ printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
++ p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
++ for (i=0;i<8; i++) {
++ valDTD = p[i];
++ printk(BIOS_DEBUG, " %03x", valDTD);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++ }
++#endif
++
++ printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
++ printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
++ printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
++ printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
++}
++
++/* DQS Receiver Enable Training Pattern Generation (Family 15h)
++ * Algorithm detailed in:
++ * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 (4)
++ */
++static void generate_dram_receiver_enable_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver)
++{
++ uint32_t dword;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ /* 2.10.5.7.1.1
++ * It appears that the DCT only supports 8-beat burst length mode,
++ * so do nothing here...
++ */
++
++ /* Wait for CmdSendInProg == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ } while (dword & (0x1 << 12));
++
++ /* Set CmdTestEnable = 1 */
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword |= (0x1 << 2);
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ /* 2.10.5.8.6.1.1 Send Activate Command */
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
++ dword |= ((0x1 << Receiver) << 22);
++ dword &= ~(0x7 << 19); /* CmdBank = 0 */
++ dword &= ~(0x3ffff); /* CmdAddress = 0 */
++ dword |= (0x1 << 31); /* SendActCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x28c, dword);
++
++ /* Wait for SendActCmd == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ } while (dword & (0x1 << 31));
++
++ /* Wait 75 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75);
++
++ /* 2.10.5.8.6.1.2 */
++ Set_NB32_DCT(dev, dct, 0x274, 0x0); /* DQMask = 0 */
++ Set_NB32_DCT(dev, dct, 0x278, 0x0);
++
++ dword = Get_NB32_DCT(dev, dct, 0x27c);
++ dword &= ~(0xff); /* EccMask = 0 */
++ if (pDCTstat->DimmECCPresent == 0)
++ dword |= 0xff; /* EccMask = 0xff */
++ Set_NB32_DCT(dev, dct, 0x27c, dword);
++
++ /* 2.10.5.8.6.1.2 */
++ dword = Get_NB32_DCT(dev, dct, 0x270);
++ dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */
++// dword |= (0x55555);
++ dword |= (0x44443); /* Use AGESA seed */
++ Set_NB32_DCT(dev, dct, 0x270, dword);
++
++ /* 2.10.5.8.2 (4) */
++ dword = Get_NB32_DCT(dev, dct, 0x260);
++ dword &= ~(0x1fffff); /* CmdCount = 192 */
++ dword |= 192;
++ Set_NB32_DCT(dev, dct, 0x260, dword);
++
++#if 0
++ /* TODO: This applies to Fam15h model 10h and above only */
++ /* Program Bubble Count and CmdStreamLen */
++ dword = Get_NB32_DCT(dev, dct, 0x25c);
++ dword &= ~(0x3ff << 12); /* BubbleCnt = 0 */
++ dword &= ~(0x3ff << 22); /* BubbleCnt2 = 0 */
++ dword &= ~(0xff); /* CmdStreamLen = 1 */
++ dword |= 0x1;
++ Set_NB32_DCT(dev, dct, 0x25c, dword);
++#endif
++
++ /* Configure Target A */
++ dword = Get_NB32_DCT(dev, dct, 0x254);
++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */
++ dword |= (Receiver & 0x7) << 24;
++ dword &= ~(0x7 << 21); /* TgtBank = 0 */
++ dword &= ~(0x3ff); /* TgtAddress = 0 */
++ Set_NB32_DCT(dev, dct, 0x254, dword);
++
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword |= (0x1 << 3); /* ResetAllErr = 1 */
++ dword &= ~(0x1 << 4); /* StopOnErr = 0 */
++ dword &= ~(0x3 << 8); /* CmdTgt = 0 (Target A) */
++ dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */
++ dword |= (0x1 << 11); /* SendCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10))));
++
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword &= ~(0x1 << 11); /* SendCmd = 0 */
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++
++ /* 2.10.5.8.6.1.1 Send Precharge Command */
++ /* Wait 25 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
++
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */
++ dword |= ((0x1 << Receiver) << 22);
++ dword &= ~(0x7 << 19); /* CmdBank = 0 */
++ dword &= ~(0x3ffff); /* CmdAddress = 0x400 */
++ dword |= 0x400;
++ dword |= (0x1 << 30); /* SendPchgCmd = 1 */
++ Set_NB32_DCT(dev, dct, 0x28c, dword);
++
++ /* Wait for SendPchgCmd == 0 */
++ do {
++ dword = Get_NB32_DCT(dev, dct, 0x28c);
++ } while (dword & (0x1 << 30));
++
++ /* Wait 25 MEMCLKs. */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25);
++
++ /* Set CmdTestEnable = 0 */
++ dword = Get_NB32_DCT(dev, dct, 0x250);
++ dword &= ~(0x1 << 2);
++ Set_NB32_DCT(dev, dct, 0x250, dword);
++}
++
++/* DQS Receiver Enable Training (Family 15h)
++ * Algorithm detailed in:
++ * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2
++ * This algorithm runs once at the lowest supported MEMCLK,
++ * then once again at the highest supported MEMCLK.
++ */
++static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
++ struct DCTStatStruc *pDCTstat, u8 Pass)
++{
++ u8 Channel;
++ u8 _2Ranks;
++ u8 Addl_Index = 0;
++ u8 Receiver;
++ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
++ u32 Errors;
++
++ u32 val;
++ u32 dev;
++ u32 index_reg;
++ u32 ch_start, ch_end, ch;
++ u32 msr;
++ u32 cr4;
++ u32 lo, hi;
++
++ uint32_t dword;
++ uint8_t dimm;
++ uint8_t rank;
++ uint8_t lane;
++ uint8_t mem_clk;
++ uint16_t initial_seed;
++ uint16_t current_total_delay[MAX_BYTE_LANES];
++ uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES];
++ uint16_t rank0_current_total_delay[MAX_BYTE_LANES];
++ uint16_t phase_recovery_delays[MAX_BYTE_LANES];
++ uint16_t seed[MAX_BYTE_LANES];
++ uint16_t seed_gross[MAX_BYTE_LANES];
++ uint16_t seed_fine[MAX_BYTE_LANES];
++ uint16_t seed_pre_gross[MAX_BYTE_LANES];
++
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
++
++ print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
++ print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
++
++ dev = pDCTstat->dev_dct;
++ index_reg = 0x98;
++ ch_start = 0;
++ ch_end = 2;
++
++ for (ch = ch_start; ch < ch_end; ch++) {
++ uint8_t max_rd_latency = 0x55;
++ uint8_t p_state;
++
++ /* 2.10.5.6 */
++ fam15EnableTrainingMode(pMCTstat, pDCTstat, ch, 1);
++
++ /* 2.10.5.2 */
++ for (p_state = 0; p_state < 3; p_state++) {
++ val = Get_NB32_DCT_NBPstate(dev, ch, p_state, 0x210);
++ val &= ~(0x3ff << 22); /* MaxRdLatency = max_rd_latency */
++ val |= (max_rd_latency & 0x3ff) << 22;
++ Set_NB32_DCT_NBPstate(dev, ch, p_state, 0x210, val);
++ }
++ }
++
++ if (Pass != FirstPass) {
++ pDCTstat->DimmTrainFail = 0;
++ pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
++ }
++
++ cr4 = read_cr4();
++ if(cr4 & ( 1 << 9)) { /* save the old value */
++ _SSE2 = 1;
++ }
++ cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
++ write_cr4(cr4);
++
++ msr = HWCR;
++ _RDMSR(msr, &lo, &hi);
++ /* FIXME: Why use SSEDIS */
++ if(lo & (1 << 17)) { /* save the old value */
++ _Wrap32Dis = 1;
++ }
++ lo |= (1 << 17); /* HWCR.wrap32dis */
++ lo &= ~(1 << 15); /* SSEDIS */
++ _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
++
++ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
++
++ Errors = 0;
++ dev = pDCTstat->dev_dct;
++
++ for (Channel = 0; Channel < 2; Channel++) {
++ print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
++ print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
++ pDCTstat->Channel = Channel;
++
++ mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f;
++
++ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
++ /* There are four receiver pairs, loosely associated with chipselects.
++ * This is essentially looping over each DIMM.
++ */
++ for (; Receiver < 8; Receiver += 2) {
++ Addl_Index = (Receiver >> 1) * 3 + 0x10;
++ dimm = (Receiver >> 1);
++
++ print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
++
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
++ continue;
++ }
++
++ /* Retrieve the total delay values from pass 1 of DQS receiver enable training */
++ if (Pass != FirstPass) {
++ read_dqs_receiver_enable_control_registers(dqs_ret_pass1_total_delay, dev, Channel, dimm, index_reg);
++ }
++
++ /* 2.10.5.8.2
++ * Loop over all ranks
++ */
++ if (mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
++ _2Ranks = 1;
++ else
++ _2Ranks = 0;
++ for (rank = 0; rank < (_2Ranks + 1); rank++) {
++ /* 2.10.5.8.2 (1)
++ * Specify the target DIMM to be trained
++ * Set TrNibbleSel = 0
++ *
++ * TODO: Add support for x4 DIMMs
++ */
++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
++ dword &= ~(0x3 << 4); /* TrDimmSel */
++ dword |= ((dimm & 0x3) << 4);
++ dword &= ~(0x1 << 2); /* TrNibbleSel */
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
++
++ /* 2.10.5.8.2 (2)
++ * Retrieve gross and fine timing fields from write DQS registers
++ */
++ read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
++
++ /* 2.10.5.8.2.1
++ * Generate the DQS Receiver Enable Training Seed Values
++ */
++ if (Pass == FirstPass) {
++ initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type);
++
++ /* Adjust seed for the minimum platform supported frequency */
++ initial_seed = (uint16_t) (((((uint64_t) initial_seed) *
++ fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint16_t wl_pass1_delay;
++ wl_pass1_delay = current_total_delay[lane];
++
++ seed[lane] = initial_seed + wl_pass1_delay;
++ }
++ } else {
++ uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
++ uint16_t register_delay;
++ int16_t seed_prescaling;
++
++ memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay));
++ if ((pDCTstat->Status & (1 << SB_Registered))) {
++ if (addr_prelaunch)
++ register_delay = 0x30;
++ else
++ register_delay = 0x20;
++ } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) {
++ /* TODO
++ * Load reduced DIMM support unimplemented
++ */
++ register_delay = 0x0;
++ } else {
++ register_delay = 0x0;
++ }
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ seed_prescaling = current_total_delay[lane] - register_delay - 0x20;
++ seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
++ }
++ }
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ seed_gross[lane] = (seed[lane] >> 5) & 0x1f;
++ seed_fine[lane] = seed[lane] & 0x1f;
++
++ /*if (seed_gross[lane] == 0)
++ seed_pre_gross[lane] = 0;
++ else */if (seed_gross[lane] & 0x1)
++ seed_pre_gross[lane] = 1;
++ else
++ seed_pre_gross[lane] = 2;
++
++ /* Calculate phase recovery delays */
++ phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f);
++
++ /* Set the gross delay.
++ * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears
++ * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well.
++ */
++ current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5);
++ }
++
++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6)
++ * Program PhRecFineDly and PhRecGrossDly
++ */
++ write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
++
++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7)
++ * Program the DQS Receiver Enable delay values for each lane
++ */
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
++
++ /* 2.10.5.8.2 (3)
++ * Program DqsRcvTrEn = 1
++ */
++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
++ dword |= (0x1 << 13);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
++
++ /* 2.10.5.8.2 (4)
++ * Issue 192 read requests to the target rank
++ */
++ generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1));
++
++ /* 2.10.5.8.2 (5)
++ * Program DqsRcvTrEn = 0
++ */
++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008);
++ dword &= ~(0x1 << 13);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword);
++
++ /* 2.10.5.8.2 (6)
++ * Read PhRecGrossDly, PhRecFineDly
++ */
++ read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg);
++
++ /* 2.10.5.8.2 (7)
++ * Calculate and program the DQS Receiver Enable delay values
++ */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f);
++ current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5);
++ if (lane == 8)
++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
++ else
++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
++ }
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
++
++ if (rank == 0) {
++ /* Back up the Rank 0 delays for later use */
++ memcpy(rank0_current_total_delay, current_total_delay, sizeof(current_total_delay));
++ }
++
++ if (rank == 1) {
++ /* 2.10.5.8.2 (8)
++ * Compute the average delay across both ranks and program the result into
++ * the DQS Receiver Enable delay registers
++ */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ current_total_delay[lane] = (rank0_current_total_delay[lane] + current_total_delay[lane]) / 2;
++ if (lane == 8)
++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane];
++ else
++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane];
++ }
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
++ }
++ }
++
++#if DQS_TRAIN_DEBUG > 0
++ for (lane = 0; lane < 8; lane++)
++ print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
++#endif
++ }
++ }
++
++ /* Calculate and program MaxRdLatency */
++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel);
+
+ if(_DisableDramECC) {
+ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
+@@ -674,10 +1486,10 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
+ }
+
+ for (ch=0; ch<ch_end; ch++) {
+- reg = 0x78 + 0x100 * ch;
+- val = Get_NB32(dev, reg);
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, ch, reg);
+ val &= ~(1 << DqsRcvEnTrain);
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, ch, reg, val);
+ }
+ }
+
+@@ -718,7 +1530,7 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
+ /* get the register index from table */
+ index = Table_DQSRcvEn_Offset[i >> 1];
+ index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
+- val = Get_NB32_index_wait(dev, index_reg, index);
++ val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, index);
+ if(i & 1) {
+ /* odd byte lane */
+ val &= ~(0x1ff << 16);
+@@ -728,7 +1540,7 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
+ val &= ~0x1ff;
+ val |= (RcvrEnDly & 0x1ff);
+ }
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val);
+ }
+
+ }
+@@ -742,7 +1554,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D
+ u32 reg;
+ u32 SubTotal;
+ u32 index_reg;
+- u32 reg_off;
+ u32 val;
+
+ uint8_t cpu_val_n;
+@@ -777,17 +1588,16 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D
+ Channel = 0;
+
+ dev = pDCTstat->dev_dct;
+- reg_off = 0x100 * Channel;
+- index_reg = 0x98 + reg_off;
++ index_reg = 0x98;
+
+ /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
+- val = Get_NB32(dev, 0x88 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x88);
+ SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
+
+ /* If registered DIMMs are being used then
+ * add 1 MEMCLK to the sub-total.
+ */
+- val = Get_NB32(dev, 0x90 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x90);
+ if(!(val & (1 << UnBuffDimm)))
+ SubTotal += 2;
+
+@@ -795,7 +1605,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D
+ * add 1, else add 2 to the sub-total.
+ * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
+ */
+- val = Get_NB32_index_wait(dev, index_reg, 0x04);
++ val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04);
+ if(!(val & 0x00202020))
+ SubTotal += 1;
+ else
+@@ -803,7 +1613,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D
+
+ /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
+ * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
+- val = Get_NB32(dev, 0x78 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x78);
+ SubTotal += 8 - (val & 0x0f);
+
+ /* Convert bits 7-5 (also referred to as the coarse delay) of
+@@ -824,7 +1634,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D
+ * clocks (NCLKs)
+ */
+ SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
+- SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)];
++ SubTotal /= freq_tab[((Get_NB32_DCT(pDCTstat->dev_dct, Channel, 0x94) & 0x7) - 3)];
+ SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */
+
+ /* Add "N" NCLKs to the sub-total. "N" represents part of the
+@@ -841,13 +1651,13 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D
+ /* Program the F2x[1, 0]78[MaxRdLatency] register with
+ * the total delay value (in NCLKs).
+ */
+- reg = 0x78 + reg_off;
+- val = Get_NB32(dev, reg);
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, Channel, reg);
+ val &= ~(0x3ff << 22);
+ val |= (SubTotal & 0x3ff) << 22;
+
+ /* program MaxRdLatency to correspond with current delay */
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, Channel, reg, val);
+ }
+
+ static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+@@ -877,7 +1687,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ u32 dword;
+ u8 dn = 4; /* TODO: Rev C could be 4 */
+ u32 dev = pDCTstat->dev_dct;
+- u32 index_reg = 0x98 + 0x100 * Channel;
++ u32 index_reg = 0x98;
+
+ /* FIXME: add Cx support */
+ dword = 0x00000000;
+@@ -885,7 +1695,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ for(j=0; j<dn; j++)
+ /* DIMM0 Write Data Timing Low */
+ /* DIMM0 Write ECC Timing */
+- Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword);
+ }
+
+ /* errata #180 */
+@@ -893,13 +1703,13 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ for(i=5; i<=6; i++) {
+ for(j=0; j<dn; j++)
+ /* DIMM0 Read DQS Timing Control Low */
+- Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword);
+ }
+
+ dword = 0x0000002f;
+ for(j=0; j<dn; j++)
+ /* DIMM0 Read DQS ECC Timing Control */
+- Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 7 + 0x100 * j, dword);
+ }
+
+ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
+@@ -912,13 +1722,13 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
+ u32 val;
+
+ dev = pDCTstat->dev_dct;
+- index_reg = 0x98 + Channel * 0x100;
++ index_reg = 0x98;
+ index = 0x12;
+ p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
+ print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
+ for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
+ val = p[ChipSel>>1];
+- Set_NB32_index_wait(dev, index_reg, index, val);
++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val);
+ print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
+ ChipSel, " rcvr_delay ", val, 2);
+ index += 3;
+@@ -1002,95 +1812,305 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
+ u8 Node = 0;
+ struct DCTStatStruc *pDCTstat;
+
++ printk(BIOS_DEBUG, "%s: Start\n", __func__);
++
+ /* FIXME: skip for Ax */
+- while (Node < MAX_NODES_SUPPORTED) {
++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+ pDCTstat = pDCTstatA + Node;
++ if (!pDCTstat->NodePresent)
++ continue;
++
++ if (pDCTstat->DCTSysLimit) {
++ if (is_fam15h()) {
++ /* Fam15h BKDG v3.14 section 2.10.5.3.3
++ * This picks up where InitDDRPhy left off
++ */
++ uint8_t dct;
++ uint8_t index;
++ uint32_t dword;
++ uint32_t datc_backup;
++ uint32_t training_dword;
++ uint32_t fence2_config_dword;
++ uint32_t fence_tx_pad_config_dword;
++ uint32_t index_reg = 0x98;
++ uint32_t dev = pDCTstat->dev_dct;
++
++ for (dct = 0; dct < 2; dct++) {
++ if (!pDCTstat->DIMMValidDCT[dct])
++ continue;
++
++ /* Back up D18F2x9C_x0000_0004_dct[1:0] */
++ datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004);
++
++ /* FenceTrSel = 0x2 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
++ dword &= ~(0x3 << 6);
++ dword |= (0x2 << 6);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
++
++ /* Set phase recovery seed values */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
++
++ training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
++
++ /* Save calculated fence value to the TX DLL */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
++ dword &= ~(0x1f << 26);
++ dword |= ((training_dword & 0x1f) << 26);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
++
++ /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x1 */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8));
++ dword &= ~(0x7 << 12);
++ dword |= (0x1 << 12);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword);
++ }
++
++ /* FenceTrSel = 0x1 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
++ dword &= ~(0x3 << 6);
++ dword |= (0x1 << 6);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
++
++ /* Set phase recovery seed values */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
++
++ training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
++
++ /* Save calculated fence value to the RX DLL */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
++ dword &= ~(0x1f << 21);
++ dword |= ((training_dword & 0x1f) << 21);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
++
++ /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x0 */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8));
++ dword &= ~(0x7 << 12);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword);
++ }
++
++ /* FenceTrSel = 0x3 */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008);
++ dword &= ~(0x3 << 6);
++ dword |= (0x3 << 6);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword);
++
++ /* Set phase recovery seed values */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013);
++
++ fence_tx_pad_config_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct);
++
++ /* Save calculated fence value to the TX Pad */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
++ dword &= ~(0x1f << 16);
++ dword |= ((fence_tx_pad_config_dword & 0x1f) << 16);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword);
++
++ /* Program D18F2x9C_x0D0F_[C,8,2][2:0]31_dct[1:0] */
++ training_dword = fence_tx_pad_config_dword;
++ if (fence_tx_pad_config_dword < 16)
++ training_dword |= (0x1 << 4);
++ else
++ training_dword = 0;
++ for (index = 0; index < 0x3; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8));
++ dword &= ~(0x1f);
++ dword |= (training_dword & 0x1f);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8), dword);
++ }
++ for (index = 0; index < 0x3; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8));
++ dword &= ~(0x1f);
++ dword |= (training_dword & 0x1f);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8), dword);
++ }
++ for (index = 0; index < 0x3; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8));
++ dword &= ~(0x1f);
++ dword |= (training_dword & 0x1f);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8), dword);
++ }
++
++ /* Assemble Fence2 configuration word (Fam15h BKDG v3.14 page 331) */
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c);
++ fence2_config_dword = 0;
++
++ /* TxPad */
++ training_dword = (dword >> 16) & 0x1f;
++ if (training_dword < 16)
++ training_dword |= 0x10;
++ else
++ training_dword = 0;
++ fence2_config_dword |= training_dword;
++
++ /* RxDll */
++ training_dword = (dword >> 21) & 0x1f;
++ if (training_dword < 16)
++ training_dword |= 0x10;
++ else
++ training_dword = 0;
++ fence2_config_dword |= (training_dword << 10);
++
++ /* TxDll */
++ training_dword = (dword >> 26) & 0x1f;
++ if (training_dword < 16)
++ training_dword |= 0x10;
++ else
++ training_dword = 0;
++ fence2_config_dword |= (training_dword << 5);
++
++ /* Program D18F2x9C_x0D0F_0[F,8:0]31_dct[1:0] */
++ for (index = 0; index < 0x9; index++) {
++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8));
++ dword &= ~(0x7fff);
++ dword |= fence2_config_dword;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8), dword);
++ }
+
+- if(pDCTstat->DCTSysLimit) {
+- fenceDynTraining_D(pMCTstat, pDCTstat, 0);
+- fenceDynTraining_D(pMCTstat, pDCTstat, 1);
++ /* Restore D18F2x9C_x0000_0004_dct[1:0] */
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup);
++ }
++ } else {
++ fenceDynTraining_D(pMCTstat, pDCTstat, 0);
++ fenceDynTraining_D(pMCTstat, pDCTstat, 1);
++ }
+ }
+- Node++;
+ }
++
++ printk(BIOS_DEBUG, "%s: Done\n", __func__);
+ }
+
+-static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
++static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+ u16 avRecValue;
+ u32 val;
+ u32 dev;
+- u32 index_reg = 0x98 + 0x100 * dct;
++ u32 index_reg = 0x98;
+ u32 index;
+
+- /* BIOS first programs a seed value to the phase recovery engine
+- * (recommended 19) registers.
+- * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
+- * F2x[1,0]9C_x52.) .
+- */
+ dev = pDCTstat->dev_dct;
+- for (index = 0x50; index <= 0x52; index ++) {
+- val = (FenceTrnFinDlySeed & 0x1F);
+- if (index != 0x52) {
+- val |= val << 8 | val << 16 | val << 24;
++
++ if (is_fam15h()) {
++ /* Set F2x[1,0]9C_x08[PhyFenceTrEn] */
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
++ val |= 1 << PhyFenceTrEn;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
++
++ /* Wait 2000 MEMCLKs */
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 2000);
++
++ /* Clear F2x[1,0]9C_x08[PhyFenceTrEn] */
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
++ val &= ~(1 << PhyFenceTrEn);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
++
++ /* BIOS reads the phase recovery engine registers
++ * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52.
++ * Average the fine delay components only.
++ */
++ avRecValue = 0;
++ for (index = 0x50; index <= 0x52; index++) {
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
++ avRecValue += val & 0x1f;
++ if (index != 0x52) {
++ avRecValue += (val >> 8) & 0x1f;
++ avRecValue += (val >> 16) & 0x1f;
++ avRecValue += (val >> 24) & 0x1f;
++ }
+ }
+- Set_NB32_index_wait(dev, index_reg, index, val);
+- }
+
+- /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
+- val = Get_NB32_index_wait(dev, index_reg, 0x08);
+- val |= 1 << PhyFenceTrEn;
+- Set_NB32_index_wait(dev, index_reg, 0x08, val);
+-
+- /* Wait 200 MEMCLKs. */
+- mct_Wait(50000); /* wait 200us */
+-
+- /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
+- val = Get_NB32_index_wait(dev, index_reg, 0x08);
+- val &= ~(1 << PhyFenceTrEn);
+- Set_NB32_index_wait(dev, index_reg, 0x08, val);
+-
+- /* BIOS reads the phase recovery engine registers
+- * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
+- avRecValue = 0;
+- for (index = 0x50; index <= 0x52; index ++) {
+- val = Get_NB32_index_wait(dev, index_reg, index);
+- avRecValue += val & 0x7F;
+- if (index != 0x52) {
+- avRecValue += (val >> 8) & 0x7F;
+- avRecValue += (val >> 16) & 0x7F;
+- avRecValue += (val >> 24) & 0x7F;
++ val = avRecValue / 9;
++ if (avRecValue % 9)
++ val++;
++ avRecValue = val;
++
++ if (avRecValue < 6)
++ avRecValue = 0;
++ else
++ avRecValue -= 6;
++
++ return avRecValue;
++ } else {
++ /* BIOS first programs a seed value to the phase recovery engine
++ * (recommended 19) registers.
++ * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
++ * F2x[1,0]9C_x52.) .
++ */
++ for (index = 0x50; index <= 0x52; index ++) {
++ val = (FenceTrnFinDlySeed & 0x1F);
++ if (index != 0x52) {
++ val |= val << 8 | val << 16 | val << 24;
++ }
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val);
+ }
+- }
+
+- val = avRecValue / 9;
+- if (avRecValue % 9)
+- val++;
+- avRecValue = val;
++ /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
++ val |= 1 << PhyFenceTrEn;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
++
++ /* Wait 200 MEMCLKs. */
++ mct_Wait(50000); /* wait 200us */
++
++ /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08);
++ val &= ~(1 << PhyFenceTrEn);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val);
++
++ /* BIOS reads the phase recovery engine registers
++ * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
++ avRecValue = 0;
++ for (index = 0x50; index <= 0x52; index ++) {
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index);
++ avRecValue += val & 0x7F;
++ if (index != 0x52) {
++ avRecValue += (val >> 8) & 0x7F;
++ avRecValue += (val >> 16) & 0x7F;
++ avRecValue += (val >> 24) & 0x7F;
++ }
++ }
+
+- /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
+- /* inlined mct_AdjustFenceValue() */
+- /* TODO: The RBC0 is not supported. */
+- /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
+- avRecValue -= 3;
+- else
+- */
+- if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
+- avRecValue -= 8;
+- else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
+- avRecValue -= 8;
+- else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
+- avRecValue -= 8;
+-
+- val = Get_NB32_index_wait(dev, index_reg, 0x0C);
+- val &= ~(0x1F << 16);
+- val |= (avRecValue & 0x1F) << 16;
+- Set_NB32_index_wait(dev, index_reg, 0x0C, val);
+-
+- /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
+- * delays (both channels). */
+- val = Get_NB32_index_wait(dev, index_reg, 0x04);
+- Set_NB32_index_wait(dev, index_reg, 0x04, val);
++ val = avRecValue / 9;
++ if (avRecValue % 9)
++ val++;
++ avRecValue = val;
++
++ /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
++ /* inlined mct_AdjustFenceValue() */
++ /* TODO: The RBC0 is not supported. */
++ /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
++ avRecValue -= 3;
++ else
++ */
++ if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
++ avRecValue -= 8;
++ else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
++ avRecValue -= 8;
++ else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
++ avRecValue -= 8;
++
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C);
++ val &= ~(0x1F << 16);
++ val |= (avRecValue & 0x1F) << 16;
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C, val);
++
++ /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
++ * delays (both channels).
++ */
++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x04);
++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x04, val);
++
++ return avRecValue;
++ }
+ }
+
+ void mct_Wait(u32 cycles)
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
+index f01e011..55068ce 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
+@@ -21,8 +21,14 @@
+ u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass)
+ {
+ u8 ret = 1;
+- if (pass == SecondPass)
+- ret = 0;
++
++ if (is_fam15h()) {
++ /* Fam15h needs two passes */
++ ret = 1;
++ } else {
++ if (pass == SecondPass)
++ ret = 0;
++ }
+
+ return ret;
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
+index 920f514..68acc75 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
+@@ -218,12 +218,12 @@ static void mct_setMaxRdLatTrnVal_D(struct DCTStatStruc *pDCTstat,
+ }
+
+ dev = pDCTstat->dev_dct;
+- reg = 0x78 + Channel * 0x100;
+- val = Get_NB32(dev, reg);
++ reg = 0x78;
++ val = Get_NB32_DCT(dev, Channel, reg);
+ val &= ~(0x3ff<<22);
+ val |= MaxRdLatVal<<22;
+ /* program MaxRdLatency to correspond with current delay */
+- Set_NB32(dev, reg, val);
++ Set_NB32_DCT(dev, Channel, reg, val);
+ }
+
+ static u8 CompareMaxRdLatTestPattern_D(u32 pattern_buf, u32 addr)
+@@ -320,30 +320,28 @@ u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat,
+ u32 valx;
+ u32 valxx;
+ u32 index_reg;
+- u32 reg_off;
+ u32 dev;
+
+ if(pDCTstat->GangedMode)
+ Channel = 0;
+
+- index_reg = 0x98 + 0x100 * Channel;
++ index_reg = 0x98;
+
+- reg_off = 0x100 * Channel;
+ dev = pDCTstat->dev_dct;
+
+ /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
+- val = Get_NB32(dev, 0x88 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x88);
+ SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */
+
+ /* If registered DIMMs are being used then add 1 MEMCLK to the sub-total*/
+- val = Get_NB32(dev, 0x90 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x90);
+ if(!(val & (1 << UnBuffDimm)))
+ SubTotal += 2;
+
+ /*If the address prelaunch is setup for 1/2 MEMCLKs then add 1,
+ * else add 2 to the sub-total. if (AddrCmdSetup || CsOdtSetup
+ * || CkeSetup) then K := K + 2; */
+- val = Get_NB32_index_wait(dev, index_reg, 0x04);
++ val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04);
+ if(!(val & 0x00202020))
+ SubTotal += 1;
+ else
+@@ -351,7 +349,7 @@ u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat,
+
+ /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
+ * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
+- val = Get_NB32(dev, 0x78 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x78);
+ SubTotal += 8 - (val & 0x0f);
+
+ /* Convert bits 7-5 (also referred to as the course delay) of the current
+@@ -367,7 +365,7 @@ u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat,
+
+ /*New formula:
+ SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
+- val = Get_NB32(dev, 0x94 + reg_off);
++ val = Get_NB32_DCT(dev, Channel, 0x94);
+ /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
+ val &= 7;
+ if (val >= 3) {
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+index 1c3e322..0ff4484 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+@@ -83,6 +83,12 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
+ pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_REGISTERED] = 0;
+ }
+
++ if (pDCTstat->Status & (1 << SB_LoadReduced)) {
++ pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED] = 1;
++ } else {
++ pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED] = 0;
++ }
++
+ pDCTstat->C_DCTPtr[dct]->RegMan1Present = pDCTstat->RegMan1Present;
+
+ for (dimm = 0; dimm < MAX_TOTAL_DIMMS; dimm++) {
+@@ -103,13 +109,13 @@ void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDC
+ {
+ u32 val;
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
+ val |= 1 << 11;
+- Set_NB32(pDCTstat->dev_dct, 0x94, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val);
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
+ val |= 1 << 11;
+- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val);
+ }
+
+ void DisableZQcalibration(struct MCTStatStruc *pMCTstat,
+@@ -117,15 +123,15 @@ void DisableZQcalibration(struct MCTStatStruc *pMCTstat,
+ {
+ u32 val;
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
+ val &= ~(1 << 11);
+ val &= ~(1 << 10);
+- Set_NB32(pDCTstat->dev_dct, 0x94, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val);
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
+ val &= ~(1 << 11);
+ val &= ~(1 << 10);
+- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val);
+ }
+
+ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat,
+@@ -142,23 +148,23 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat,
+
+ /* Program F2x[1, 0]90[EnterSelfRefresh]=1. */
+ if (DCT0Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
+ val |= 1 << EnterSelfRef;
+- Set_NB32(pDCTstat->dev_dct, 0x90, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, val);
+ }
+ if (DCT1Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
+ val |= 1 << EnterSelfRef;
+- Set_NB32(pDCTstat->dev_dct, 0x90 + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, val);
+ }
+ /* Wait until the hardware resets F2x[1, 0]90[EnterSelfRefresh]=0. */
+ if (DCT0Present)
+ do {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
+ } while (val & (1 <<EnterSelfRef));
+ if (DCT1Present)
+ do {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
+ } while (val & (1 <<EnterSelfRef));
+ }
+
+@@ -168,8 +174,11 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat,
+ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+ {
+- u8 DCT0Present, DCT1Present;
+- u32 val;
++ uint8_t DCT0Present;
++ uint8_t DCT1Present;
++ uint32_t dword;
++ uint32_t mask;
++ uint32_t offset;
+
+ DCT0Present = pDCTstat->DIMMValidDCT[0];
+ if (pDCTstat->GangedMode)
+@@ -177,76 +186,134 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
+ else
+ DCT1Present = pDCTstat->DIMMValidDCT[1];
+
+- /* Program F2x[1, 0]90[EnterSelfRefresh]=1. */
+- if (DCT0Present) {
+- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8);
+- val |= 1 << DisAutoComp;
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8, val);
+- }
+- if (DCT1Present) {
+- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8);
+- val |= 1 << DisAutoComp;
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8, val);
++ if (is_fam15h()) {
++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0x190 */
++ if (DCT0Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006);
++ dword &= ~(0x0000ffff);
++ dword |= 0x00000190;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006, dword);
++ }
++ if (DCT1Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006);
++ dword &= ~(0x0000ffff);
++ dword |= 0x00000190;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006, dword);
++ }
++ } else {
++ /* Program F2x[1, 0]9C[DisAutoComp]=1. */
++ if (DCT0Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8);
++ dword |= 1 << DisAutoComp;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8, dword);
++ mct_Wait(100); /* Wait for 5us */
++ }
++ if (DCT1Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8);
++ dword |= 1 << DisAutoComp;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8, dword);
++ mct_Wait(100); /* Wait for 5us */
++ }
+ }
+
+ /* Program F2x[1, 0]94[MemClkFreqVal] = 0. */
+ if (DCT0Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
+- val &= ~(1 << MemClkFreqVal);
+- Set_NB32(pDCTstat->dev_dct, 0x94, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
++ dword &= ~(1 << MemClkFreqVal);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, dword);
+ }
+ if (DCT1Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
+- val &= ~(1 << MemClkFreqVal);
+- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
++ dword &= ~(1 << MemClkFreqVal);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, dword);
+ }
+
+ /* Program F2x[1, 0]94[MemClkFreq] to specify the target MEMCLK frequency. */
++ if (is_fam15h()) {
++ offset = 0x0;
++ mask = 0x1f;
++ } else {
++ offset = 0x1;
++ mask = 0x7;
++ }
+ if (DCT0Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
+- val &= 0xFFFFFFF8;
+- val |= pDCTstat->TargetFreq - 1;
+- Set_NB32(pDCTstat->dev_dct, 0x94, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
++ dword &= ~mask;
++ dword |= (pDCTstat->TargetFreq - offset) & mask;
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, dword);
+ }
+ if (DCT1Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
+- val &= 0xFFFFFFF8;
+- val |= pDCTstat->TargetFreq - 1;
+- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
++ dword &= ~mask;
++ dword |= (pDCTstat->TargetFreq - offset) & mask;
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, dword);
++ }
++
++ if (is_fam15h()) {
++ if (DCT0Present) {
++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 0);
++ set_2t_configuration(pMCTstat, pDCTstat, 0);
++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, 0);
++ mct_PlatformSpec(pMCTstat, pDCTstat, 0);
++ }
++ if (DCT1Present) {
++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1);
++ set_2t_configuration(pMCTstat, pDCTstat, 1);
++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, 1);
++ mct_PlatformSpec(pMCTstat, pDCTstat, 1);
++ }
+ }
+
+ /* Program F2x[1, 0]94[MemClkFreqVal] = 1. */
+ if (DCT0Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
+- val |= 1 << MemClkFreqVal;
+- Set_NB32(pDCTstat->dev_dct, 0x94, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
++ dword |= 1 << MemClkFreqVal;
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, dword);
+ }
+ if (DCT1Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
+- val |= 1 << MemClkFreqVal;
+- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
++ dword |= 1 << MemClkFreqVal;
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, dword);
+ }
+
+ /* Wait until F2x[1, 0]94[FreqChgInProg]=0. */
+ if (DCT0Present)
+ do {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94);
+- } while (val & (1 << FreqChgInProg));
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
++ } while (dword & (1 << FreqChgInProg));
+ if (DCT1Present)
+ do {
+- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100);
+- } while (val & (1 << FreqChgInProg));
+-
+- /* Program F2x[1, 0]94[MemClkFreqVal] = 0. */
+- if (DCT0Present) {
+- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8);
+- val &= ~(1 << DisAutoComp);
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8, val);
+- }
+- if (DCT1Present) {
+- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8);
+- val &= ~(1 << DisAutoComp);
+- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8, val);
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
++ } while (dword & (1 << FreqChgInProg));
++
++ if (is_fam15h()) {
++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0xf */
++ if (DCT0Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006);
++ dword &= ~(0x0000ffff);
++ dword |= 0x0000000f;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006, dword);
++ }
++ if (DCT1Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006);
++ dword &= ~(0x0000ffff);
++ dword |= 0x0000000f;
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006, dword);
++ }
++ } else {
++ /* Program F2x[1, 0]9C[DisAutoComp] = 0. */
++ if (DCT0Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8);
++ dword &= ~(1 << DisAutoComp);
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8, dword);
++ mct_Wait(15000); /* Wait for 750us */
++ }
++ if (DCT1Present) {
++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8);
++ dword &= ~(1 << DisAutoComp);
++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8, dword);
++ mct_Wait(15000); /* Wait for 750us */
++ }
+ }
+ }
+
+@@ -267,29 +334,46 @@ static void ExitSelfRefresh(struct MCTStatStruc *pMCTstat,
+
+ /* Program F2x[1, 0]90[ExitSelfRef]=1 for both DCTs. */
+ if (DCT0Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
+ val |= 1 << ExitSelfRef;
+- Set_NB32(pDCTstat->dev_dct, 0x90, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, val);
+ }
+ if (DCT1Present) {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
+ val |= 1 << ExitSelfRef;
+- Set_NB32(pDCTstat->dev_dct, 0x90 + 0x100, val);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, val);
+ }
+ /* Wait until the hardware resets F2x[1, 0]90[ExitSelfRef]=0. */
+ if (DCT0Present)
+ do {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
+ } while (val & (1 << ExitSelfRef));
+ if (DCT1Present)
+ do {
+- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100);
++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
+ } while (val & (1 << ExitSelfRef));
+ }
+
+ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+ {
++ uint32_t dword;
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++
++ if (is_fam15h()) {
++ /* Program F2x[1, 0]90[DisDllShutDownSR]=1. */
++ if (pDCTstat->DIMMValidDCT[0]) {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
++ dword |= (0x1 << 27);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, dword);
++ }
++ if (pDCTstat->DIMMValidDCT[1]) {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
++ dword |= (0x1 << 27);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, dword);
++ }
++ }
++
+ /* Program F2x[1,0]90[EnterSelfRefresh]=1.
+ * Wait until the hardware resets F2x[1,0]90[EnterSelfRefresh]=0.
+ */
+@@ -305,11 +389,38 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+ */
+ ChangeMemClk(pMCTstat, pDCTstat);
+
++ if (is_fam15h()) {
++ uint8_t dct;
++ for (dct = 0; dct < 2; dct++) {
++ if (pDCTstat->DIMMValidDCT[dct]) {
++ phyAssistedMemFnceTraining(pMCTstat, pDCTstat);
++ InitPhyCompensation(pMCTstat, pDCTstat, dct);
++ }
++ }
++ }
++
+ /* Program F2x[1,0]90[ExitSelfRef]=1 for both DCTs.
+ * Wait until the hardware resets F2x[1, 0]90[ExitSelfRef]=0.
+ */
+ ExitSelfRefresh(pMCTstat, pDCTstat);
+
++ if (is_fam15h()) {
++ if ((package_type == PT_C3) || (package_type == PT_GR)) {
++ /* Socket C32 or G34 */
++ /* Program F2x[1, 0]90[DisDllShutDownSR]=0. */
++ if (pDCTstat->DIMMValidDCT[0]) {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
++ dword &= ~(0x1 << 27);
++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, dword);
++ }
++ if (pDCTstat->DIMMValidDCT[1]) {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
++ dword &= ~(0x1 << 27);
++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, dword);
++ }
++ }
++ }
++
+ /* wait for 500 MCLKs after ExitSelfRef, 500*2.5ns=1250ns */
+ mct_Wait(250);
+
+@@ -336,13 +447,13 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+ static void Modify_OnDimmMirror(struct DCTStatStruc *pDCTstat, u8 dct, u8 set)
+ {
+ u32 val;
+- u32 reg_off = dct * 0x100 + 0x44;
+- while (reg_off < (dct * 0x100 + 0x60)) {
+- val = Get_NB32(pDCTstat->dev_dct, reg_off);
++ u32 reg = 0x44;
++ while (reg < 0x60) {
++ val = Get_NB32_DCT(pDCTstat->dev_dct, dct, reg);
+ if (val & (1 << CSEnable))
+ set ? (val |= 1 << onDimmMirror) : (val &= ~(1<<onDimmMirror));
+- Set_NB32(pDCTstat->dev_dct, reg_off, val);
+- reg_off += 8;
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, reg, val);
++ reg += 8;
+ }
+ }
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+index 397fd77..35378c8 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+@@ -30,13 +30,22 @@
+ *
+ *----------------------------------------------------------------------------
+ */
+-u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue);
+-u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue);
+-void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl);
+-void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm);
+-void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass);
+-void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr);
+-void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm);
++u32 swapAddrBits_wl(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue);
++u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue);
++void prepareDimms(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
++ u8 dct, u8 dimm, BOOL wl);
++void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm);
++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass);
++void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, u8 targetAddr, uint8_t pass);
++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass);
++
++static int32_t abs(int32_t val) {
++ if (val < 0)
++ val *= -1;
++
++ return val;
++}
++
+ /*
+ *-----------------------------------------------------------------------------
+ * EXPORTED FUNCTIONS
+@@ -62,34 +71,55 @@ void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm);
+ * OUT
+ *-----------------------------------------------------------------------------
+ */
+-void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData,
+- u8 dimm, u8 pass)
++void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
++ u8 dct, u8 dimm, u8 pass)
+ {
+ u8 ByteLane;
+ u32 Value, Addr;
+ u16 Addl_Data_Offset, Addl_Data_Port;
++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+
+ pDCTData->WLPass = pass;
+ /* 1. Specify the target DIMM that is to be trained by programming
+ * F2x[1, 0]9C_x08[TrDimmSel].
+ */
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, TrDimmSelStart,
+- TrDimmSelEnd,(u32)dimm);
++ TrDimmSelEnd, (u32)dimm);
++
++ if (is_fam15h()) {
++ /* Set TrNibbleSel = 0
++ *
++ * TODO: Add support for x4 DIMMs
++ */
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_ADD_DCT_PHY_CONTROL_REG, 2,
++ 2, (u32)0);
++ }
++
+ /* 2. Prepare the DIMMs for write levelization using DDR3-defined
+ * MR commands. */
+- prepareDimms(pMCTData, pDCTData,dimm, TRUE);
++ prepareDimms(pMCTstat, pDCTstat, dct, dimm, TRUE);
++
+ /* 3. After the DIMMs are configured, BIOS waits 40 MEMCLKs to
+ * satisfy DDR3-defined internal DRAM timing.
+ */
+- pMCTData->AgesaDelay(40);
++ if (is_fam15h())
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 40);
++ else
++ pMCTData->AgesaDelay(40);
++
+ /* 4. Configure the processor's DDR phy for write levelization training: */
+- procConifg(pMCTData,pDCTData, dimm, pass);
++ procConfig(pMCTstat, pDCTstat, dct, dimm, pass);
++
+ /* 5. Begin write levelization training:
+- * Program F2x[1, 0]9C_x08[WrtLevelTrEn]=1. */
+- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx))
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ * Program F2x[1, 0]9C_x08[WrtLvTrEn]=1. */
++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL))
++ {
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 1);
++ }
+ else
+ {
+ /* Broadcast write to all D3Dbyte chipset register offset 0xc
+@@ -98,7 +128,7 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData,
+ * retain value of 3:2 (Trdimmsel)
+ * reset bit 5 (FrzPR)
+ */
+- if (pDCTData->DctTrain)
++ if (dct)
+ {
+ Addl_Data_Offset=0x198;
+ Addl_Data_Port=0x19C;
+@@ -123,29 +153,127 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData,
+ DctAccessDone, DctAccessDone)) == 0);
+ }
+
++ if (is_fam15h())
++ proc_MFENCE();
++
+ /* Wait 200 MEMCLKs. If executing pass 2, wait 32 MEMCLKs. */
+- pMCTData->AgesaDelay(140);
++ if (is_fam15h())
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 200);
++ else
++ pMCTData->AgesaDelay(140);
++
+ /* Program F2x[1, 0]9C_x08[WrtLevelTrEn]=0. */
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0);
++
+ /* Read from registers F2x[1, 0]9C_x[51:50] and F2x[1, 0]9C_x52
+ * to get the gross and fine delay settings
+ * for the target DIMM and save these values. */
+- ByteLane = 0;
+- while (ByteLane < MAX_BYTE_LANES)
+- {
+- getWLByteDelay(pDCTData,ByteLane, dimm);
+- setWLByteDelay(pDCTData,ByteLane, dimm, 1);
+- ByteLane++;
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ getWLByteDelay(pDCTstat, dct, ByteLane, dimm, pass);
++ }
++
++ pDCTData->WLCriticalGrossDelayPrevPass = 0x1f;
++}
++
++void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
++ u8 dct, u8 dimm, u8 pass)
++{
++ u8 ByteLane;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++
++ if (is_fam15h()) {
++ int32_t gross_diff[MAX_BYTE_LANES];
++ int32_t cgd = pDCTData->WLCriticalGrossDelayPrevPass;
++ uint8_t index = (uint8_t)(MAX_BYTE_LANES * dimm);
++
++ /* Calculate the Critical Gross Delay */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ /* Calculate the gross delay differential for this lane */
++ gross_diff[ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane] + pDCTData->WLGrossDelay[index+ByteLane];
++ gross_diff[ByteLane] -= pDCTData->WLSeedPreGrossDelay[index+ByteLane];
++
++ /* WrDqDqsEarly values greater than 2 are reserved */
++ if (gross_diff[ByteLane] < -2)
++ gross_diff[ByteLane] = -2;
++
++ /* Update the Critical Gross Delay */
++ if (gross_diff[ByteLane] < cgd)
++ cgd = gross_diff[ByteLane];
++ }
++
++ pDCTData->WLCriticalGrossDelayPrevPass = cgd;
++
++ /* Compensate for occasional noise/instability causing sporadic training failure */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ uint16_t total_delay_seed = ((pDCTData->WLSeedGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLSeedFineDelay[index+ByteLane] & 0x1f);
++ uint16_t total_delay_phy = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f);
++ if (abs(total_delay_phy - total_delay_seed) > 0x20) {
++ printk(BIOS_DEBUG, "%s: overriding faulty phy value\n", __func__);
++ pDCTData->WLGrossDelay[index+ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane];
++ pDCTData->WLFineDelay[index+ByteLane] = pDCTData->WLSeedFineDelay[index+ByteLane];
++ }
++ }
++ }
++}
++
++void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
++ u8 dct, u8 dimm, u8 pass)
++{
++ u8 ByteLane;
++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++
++ if (is_fam15h()) {
++ uint32_t dword;
++ int32_t gross_diff[MAX_BYTE_LANES];
++ int32_t cgd = pDCTData->WLCriticalGrossDelayPrevPass;
++ uint8_t index = (uint8_t)(MAX_BYTE_LANES * dimm);
++
++ /* Apply offset(s) if needed */
++ if (cgd < 0) {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8);
++ dword &= ~(0x3 << 24); /* WrDqDqsEarly = abs(cgd) */
++ dword |= ((abs(cgd) & 0x3) << 24);
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8, dword);
++
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ /* Calculate the gross delay differential for this lane */
++ gross_diff[ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane] + pDCTData->WLGrossDelay[index+ByteLane];
++ gross_diff[ByteLane] -= pDCTData->WLSeedPreGrossDelay[index+ByteLane];
++
++ /* Prevent underflow in the presence of noise / instability*/
++ if (gross_diff[ByteLane] < cgd)
++ gross_diff[ByteLane] = cgd;
++
++ pDCTData->WLGrossDelay[index+ByteLane] = (gross_diff[ByteLane] + (abs(cgd) & 0x3));
++ }
++ } else {
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8);
++ dword &= ~(0x3 << 24); /* WrDqDqsEarly = 0 */
++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8, dword);
++ }
++ }
++
++ /* Write the adjusted gross and fine delay settings
++ * to the target DIMM. */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ setWLByteDelay(pDCTstat, dct, ByteLane, dimm, 1, pass);
+ }
+
+ /* 6. Configure DRAM Phy Control Register so that the phy stops driving
+ * write levelization ODT. */
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, 0);
+
++ if (is_fam15h())
++ proc_MFENCE();
++
+ /* Wait 10 MEMCLKs to allow for ODT signal settling. */
+- pMCTData->AgesaDelay(10);
++ if (is_fam15h())
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 10);
++ else
++ pMCTData->AgesaDelay(10);
+
+ /* 7. Program the target DIMM back to normal operation by configuring
+ * the following (See section 2.8.5.4.1.1
+@@ -155,7 +283,7 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData,
+ * For a two DIMM system, program the Rtt value for the target DIMM
+ * to the normal operating termination:
+ */
+- prepareDimms(pMCTData, pDCTData,dimm,FALSE);
++ prepareDimms(pMCTstat, pDCTstat, dct, dimm, FALSE);
+ }
+
+ /*----------------------------------------------------------------------------
+@@ -165,7 +293,7 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData,
+ */
+
+ /*-----------------------------------------------------------------------------
+- * u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue)
++ * u32 swapAddrBits_wl(struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MRSValue)
+ *
+ * Description:
+ * This function swaps the bits in MSR register value
+@@ -177,12 +305,17 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData,
+ *
+ * ----------------------------------------------------------------------------
+ */
+-u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue)
++u32 swapAddrBits_wl(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue)
+ {
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+ u32 tempW, tempW1;
+
+- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_INIT, MrsChipSelStart, MrsChipSelEnd);
++ if (is_fam15h())
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15);
++ else
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10);
+ if (tempW1 & 1)
+ {
+ if ((pDCTData->Status[DCT_STATUS_OnDimmMirror]))
+@@ -201,7 +334,7 @@ u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue)
+ }
+
+ /*-----------------------------------------------------------------------------
+- * u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue)
++ * u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MRSValue)
+ *
+ * Description:
+ * This function swaps the bits in MSR register value
+@@ -213,12 +346,17 @@ u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue)
+ *
+ * ----------------------------------------------------------------------------
+ */
+-u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue)
++u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MRSValue)
+ {
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+ u32 tempW, tempW1;
+
+- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_INIT, MrsChipSelStart, MrsChipSelEnd);
++ if (is_fam15h())
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15);
++ else
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10);
+ if (tempW1 & 1)
+ {
+ if ((pDCTData->Status[DCT_STATUS_OnDimmMirror]))
+@@ -269,7 +407,7 @@ static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms
+ return term;
+ }
+
+-static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
++static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count)
+ {
+ uint16_t term;
+
+@@ -300,27 +438,27 @@ static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms
+ *
+ * Description:
+ * This function prepares DIMMS for training
+- *
+- * Parameters:
+- * IN OUT *DCTData - Pointer to buffer with information about each DCT
+- * *SPDData - Pointer to buffer with information about each DIMMs
+- * SPD information
+- * *MCTData - Pointer to buffer with runtime parameters,
+- * IN Dimm - Logical DIMM number
+- * WL - indicates if the routine is used for Write levelization
+- * training
+- *
+- * OUT
+- *
++ * Fam10h: BKDG Rev. 3.62 section 2.8.9.9.1
++ * Fam15h: BKDG Rev. 3.14 section 2.10.5.8.1
+ * ----------------------------------------------------------------------------
+ */
+-void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
++void prepareDimms(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
++ u8 dct, u8 dimm, BOOL wl)
+ {
+ u32 tempW, tempW1, tempW2, MrsBank;
+ u8 rank, currDimm, MemClkFreq;
++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint8_t number_of_dimms = pDCTData->MaxDimmsInstalled;
+
+- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
++ if (is_fam15h()) {
++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 4);
++ } else {
++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
++ }
+ /* Configure the DCT to send initialization MR commands to the target DIMM
+ * by programming the F2x[1,0]7C register using the following steps.
+ */
+@@ -328,52 +466,95 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ while ((rank < pDCTData->DimmRanks[dimm]) && (rank < 2))
+ {
+ /* Program F2x[1, 0]7C[MrsChipSel[2:0]] for the current rank to be trained. */
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsChipSelStart, MrsChipSelEnd, dimm*2+rank);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15, dimm*2+rank);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10, dimm*2+rank);
++
+ /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal DRAM
+ * register that defines the required DDR3-defined function for write
+ * levelization.
+ */
+- MrsBank = swapBankBits(pDCTData,1);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank);
++ MrsBank = swapBankBits(pDCTstat, dct, 1);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank);
++
+ /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required DDR3-defined function
+ * for write levelization.
+ */
+ tempW = 0;/* DLL_DIS = 0, DIC = 0, AL = 0, TDQS = 0 */
+
+- /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */
+- tempW2 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn);
+- if (tempW2)
+- {
+- if (pDCTData->DimmX8Present[dimm])
+- tempW |= 0x800;
++ /* Retrieve normal settings of the MRS control word and clear Rtt_Nom */
++ if (is_fam15h()) {
++ tempW = mct_MR1(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff;
++ tempW &= ~(0x0244);
++ } else {
++ /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */
++ tempW2 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn);
++ if (tempW2)
++ {
++ if (pDCTData->DimmX8Present[dimm])
++ tempW |= 0x800;
++ }
+ }
+
+ /* determine Rtt_Nom for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+- tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
+- } else {
++ if (is_fam15h()) {
+ if (wl) {
+- if (rank == 0) {
+- /* Get Rtt_WR for the current DIMM and rank */
+- uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
+-
+- /* Convert dynamic termination code to corresponding nominal termination code */
+- if (dynamic_term == 0x200)
+- tempW1 = 0x04;
+- else if (dynamic_term == 0x400)
+- tempW1 = 0x40;
+- else
+- tempW1 = 0x0;
++ if (number_of_dimms > 1) {
++ if (rank == 0) {
++ /* Get Rtt_WR for the current DIMM and rank */
++ tempW2 = fam15_rttwr(pDCTstat, dct, dimm, rank, package_type);
++ } else {
++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type);
++ }
+ } else {
+- tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type);
+ }
+ } else {
+- tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type);
++ }
++ tempW1 = 0;
++ tempW1 |= ((tempW2 & 0x4) >> 2) << 9;
++ tempW1 |= ((tempW2 & 0x2) >> 1) << 6;
++ tempW1 |= ((tempW2 & 0x1) >> 0) << 2;
++ } else {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
++ } else {
++ if (wl) {
++ if (number_of_dimms > 1) {
++ if (rank == 0) {
++ /* Get Rtt_WR for the current DIMM and rank */
++ uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm]);
++
++ /* Convert dynamic termination code to corresponding nominal termination code */
++ if (dynamic_term == 0x200)
++ tempW1 = 0x04;
++ else if (dynamic_term == 0x400)
++ tempW1 = 0x40;
++ else
++ tempW1 = 0x0;
++ } else {
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
++ }
++ } else {
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
++ }
++ } else {
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
++ }
+ }
+ }
++
++ /* Apply Rtt_Nom to the MRS control word */
+ tempW=tempW|tempW1;
+
+ /* All ranks of the target DIMM are set to write levelization mode. */
+@@ -393,68 +574,105 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ tempW = bitTestSet(tempW1, Qoff);
+ }
+ }
+- /* Program MrsAddress[5,1]=output driver impedance control (DIC):
+- * based on F2x[1,0]84[DrvImpCtrl]
+- */
+- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
++
++ /* Program MrsAddress[5,1]=output driver impedance control (DIC) */
++ if (is_fam15h()) {
++ tempW1 = fam15_dimm_dic(pDCTstat, dct, dimm, rank, package_type);
++ } else {
++ /* Read DIC from F2x[1,0]84[DrvImpCtrl] */
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
++ }
++
++ /* Apply DIC to the MRS control word */
+ if (bitTest(tempW1, 1))
+ tempW = bitTestSet(tempW, 5);
+ if (bitTest(tempW1, 0))
+ tempW = bitTestSet(tempW, 1);
+
+- tempW = swapAddrBits_wl(pDCTData, tempW);
++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW);
++
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW);
+
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
+ /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command to
+ * the specified DIMM.
+ */
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_INIT, SendMrsCmd, SendMrsCmd, 1);
+ /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */
+- while ((get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
++ while ((get_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd)) == 0x1)
+ {
+ }
++
+ /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal DRAM
+ * register that defines the required DDR3-defined function for Rtt_WR.
+ */
+- MrsBank = swapBankBits(pDCTData,2);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank);
++ MrsBank = swapBankBits(pDCTstat, dct, 2);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank);
++
+ /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required DDR3-defined function
+ * for Rtt_WR (DRAMTermDyn).
+ */
+ tempW = 0;/* PASR = 0,*/
+- /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL,
+- * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */
+- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH);
+- if (bitTest(tempW1,19))
+- {tempW = bitTestSet(tempW, 7);}
+- if (bitTest(tempW1,18))
+- {tempW = bitTestSet(tempW, 6);}
+- /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */
+- tempW=tempW|((tempW1&0x00700000)>>17);
+- /* workaround for DR-B0 */
+- if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
+- tempW+=0x8;
++
++ /* Retrieve normal settings of the MRS control word and clear Rtt_WR */
++ if (is_fam15h()) {
++ tempW = mct_MR2(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff;
++ tempW &= ~(0x0600);
++ } else {
++ /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL,
++ * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH);
++ if (bitTest(tempW1,19))
++ {tempW = bitTestSet(tempW, 7);}
++ if (bitTest(tempW1,18))
++ {tempW = bitTestSet(tempW, 6);}
++ /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */
++ tempW=tempW|((tempW1&0x00700000)>>17);
++ /* workaround for DR-B0 */
++ if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
++ tempW+=0x8;
++ }
++
+ /* determine Rtt_WR for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED])
+- tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
+- else
+- tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
++ if (is_fam15h()) {
++ tempW1 = (fam15_rttwr(pDCTstat, dct, dimm, rank, package_type) << 9);
++ } else {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
++ tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
++ else
++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm]);
++ }
++
++ /* Apply Rtt_WR to the MRS control word */
+ tempW=tempW|tempW1;
+- tempW = swapAddrBits_wl(pDCTData,tempW);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW);
++
+ /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command to
+ the specified DIMM.*/
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_INIT, SendMrsCmd, SendMrsCmd, 1);
++
+ /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */
+- while ((get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
++ while ((get_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd)) == 0x1)
+ {
+ }
+@@ -473,97 +691,163 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ rank = 0;
+ while ((rank < pDCTData->DimmRanks[currDimm]) && (rank < 2))
+ {
+-
+ /* Program F2x[1, 0]7C[MrsChipSel[2:0]] for the current rank
+ * to be trained.
+ */
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_INIT, MrsChipSelStart, MrsChipSelEnd, currDimm*2+rank);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15, currDimm*2+rank);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10, currDimm*2+rank);
++
+ /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal
+ * DRAM register that defines the required DDR3-defined function
+ * for write levelization.
+ */
+- MrsBank = swapBankBits(pDCTData,1);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank);
++ MrsBank = swapBankBits(pDCTstat, dct, 1);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank);
++
+ /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required
+ * DDR3-defined function for write levelization.
+ */
+ tempW = 0;/* DLL_DIS = 0, DIC = 0, AL = 0, TDQS = 0, Level=0, Qoff=0 */
+
+- /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */
+- tempW2 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn);
+- if (tempW2)
+- {
+- if (pDCTData->DimmX8Present[currDimm])
+- tempW |= 0x800;
++ /* Retrieve normal settings of the MRS control word and clear Rtt_Nom */
++ if (is_fam15h()) {
++ tempW = mct_MR1(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff;
++ tempW &= ~(0x0244);
++ } else {
++ /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */
++ tempW2 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn);
++ if (tempW2)
++ {
++ if (pDCTData->DimmX8Present[currDimm])
++ tempW |= 0x800;
++ }
+ }
+
+ /* determine Rtt_Nom for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED])
+- tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
+- else
+- tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
++ if (is_fam15h()) {
++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type);
++ tempW1 = 0;
++ tempW1 |= ((tempW2 & 0x4) >> 2) << 9;
++ tempW1 |= ((tempW2 & 0x2) >> 1) << 6;
++ tempW1 |= ((tempW2 & 0x1) >> 0) << 2;
++ } else {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
++ tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
++ else
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
++ }
++
++ /* Apply Rtt_Nom to the MRS control word */
+ tempW=tempW|tempW1;
+- /* program MrsAddress[5,1]=output driver impedance control (DIC):
+- * based on F2x[1,0]84[DrvImpCtrl] */
+- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
++
++ /* Program MrsAddress[5,1]=output driver impedance control (DIC) */
++ if (is_fam15h()) {
++ tempW1 = fam15_dimm_dic(pDCTstat, dct, dimm, rank, package_type);
++ } else {
++ /* Read DIC from F2x[1,0]84[DrvImpCtrl] */
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
++ }
++
++ /* Apply DIC to the MRS control word */
+ if (bitTest(tempW1,1))
+ {tempW = bitTestSet(tempW, 5);}
+ if (bitTest(tempW1,0))
+ {tempW = bitTestSet(tempW, 1);}
+- tempW = swapAddrBits_wl(pDCTData,tempW);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
++
++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW);
++
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW);
++
+ /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command
+ * to the specified DIMM.
+ */
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
++ set_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd, 1);
++
+ /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */
+- while ((get_Bits(pDCTData, pDCTData->CurrDct,
++ while ((get_Bits(pDCTData, dct,
+ pDCTData->NodeId, FUN_DCT, DRAM_INIT,
+ SendMrsCmd, SendMrsCmd)) == 1);
++
+ /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal DRAM
+ * register that defines the required DDR3-defined function for Rtt_WR.
+ */
+- MrsBank = swapBankBits(pDCTData,2);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank);
++ MrsBank = swapBankBits(pDCTstat, dct, 2);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank);
++
+ /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required DDR3-defined function
+ * for Rtt_WR (DRAMTermDyn).
+ */
+ tempW = 0;/* PASR = 0,*/
+- /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL,
+- * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */
+- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH);
+- if (bitTest(tempW1,19))
+- {tempW = bitTestSet(tempW, 7);}
+- if (bitTest(tempW1,18))
+- {tempW = bitTestSet(tempW, 6);}
+- /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */
+- tempW=tempW|((tempW1&0x00700000)>>17);
+- /* workaround for DR-B0 */
+- if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
+- tempW+=0x8;
++
++ /* Retrieve normal settings of the MRS control word and clear Rtt_WR */
++ if (is_fam15h()) {
++ tempW = mct_MR2(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff;
++ tempW &= ~(0x0600);
++ } else {
++ /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL,
++ * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */
++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH);
++ if (bitTest(tempW1,19))
++ {tempW = bitTestSet(tempW, 7);}
++ if (bitTest(tempW1,18))
++ {tempW = bitTestSet(tempW, 6);}
++ /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */
++ tempW=tempW|((tempW1&0x00700000)>>17);
++ /* workaround for DR-B0 */
++ if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
++ tempW+=0x8;
++ }
++
+ /* determine Rtt_WR for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED])
+- tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
+- else
+- tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
++ if (is_fam15h()) {
++ tempW1 = (fam15_rttwr(pDCTstat, dct, dimm, rank, package_type) << 9);
++ } else {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
++ tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
++ else
++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm]);
++ }
++
++ /* Apply Rtt_WR to the MRS control word */
+ tempW=tempW|tempW1;
+- tempW = swapAddrBits_wl(pDCTData,tempW);
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+- DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW);
++ if (is_fam15h())
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW);
++ else
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
++ DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW);
++
+ /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command to
+ the specified DIMM.*/
+- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_INIT, SendMrsCmd, SendMrsCmd, 1);
++
+ /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */
+- while ((get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
++ while ((get_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd)) == 0x1)
+ {
+ }
+@@ -587,29 +871,60 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ * OUT
+ * ----------------------------------------------------------------------------
+ */
+-void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm)
++void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm)
+ {
++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++
+ u8 WrLvOdt1=0;
+
+- if (pDCTData->Status[DCT_STATUS_REGISTERED] == 0) {
+- if ((pDCTData->DctCSPresent & 0x05) == 0x05) {
+- WrLvOdt1 = 0x03;
+- } else if (bitTest((u32)pDCTData->DctCSPresent,(u8)(dimm*2+1))) {
+- WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm+2);
++ if (is_fam15h()) {
++ /* Convert DIMM number to CS */
++ uint32_t dword;
++ uint8_t cs;
++ uint8_t rank = 0;
++
++ cs = (dimm * 2) + rank;
++
++ /* Fetch preprogammed ODT pattern from configuration registers */
++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, ((cs>3)?0x23c:0x238));
++ if ((cs == 7) || (cs == 3))
++ WrLvOdt1 = ((dword >> 24) & 0xf);
++ else if ((cs == 6) || (cs == 2))
++ WrLvOdt1 = ((dword >> 16) & 0xf);
++ else if ((cs == 5) || (cs == 1))
++ WrLvOdt1 = ((dword >> 8) & 0xf);
++ else if ((cs == 4) || (cs == 0))
++ WrLvOdt1 = (dword & 0xf);
++ } else {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED] == 0) {
++ if ((pDCTData->DctCSPresent & 0x05) == 0x05) {
++ WrLvOdt1 = 0x03;
++ } else if (bitTest((u32)pDCTData->DctCSPresent,(u8)(dimm*2+1))) {
++ WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm+2);
++ } else {
++ WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm);
++ }
+ } else {
+- WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm);
++ WrLvOdt1 = WrLvOdtRegDimm(pMCTData, pDCTData, dimm);
+ }
+- } else {
+- WrLvOdt1 = WrLvOdtRegDimm(pMCTData, pDCTData, dimm);
+ }
+
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, 8, 11, (u32)WrLvOdt1);
+
+ }
+
++#ifdef UNUSED_CODE
++static uint16_t fam15h_next_lowest_memclk_freq(uint16_t memclk_freq)
++{
++ uint16_t fam15h_next_lowest_freq_tab[] = {0, 0, 0, 0, 0x4, 0, 0x4, 0, 0, 0, 0x6, 0, 0, 0, 0xa, 0, 0, 0, 0xe, 0, 0, 0, 0x12};
++ return fam15h_next_lowest_freq_tab[memclk_freq];
++}
++#endif
++
+ /*-----------------------------------------------------------------------------
+- * void procConifg(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass)
++ * void procConfig(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass)
+ *
+ * Description:
+ * This function programs the ODT values for the NB
+@@ -622,31 +937,43 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm)
+ * OUT
+ * ----------------------------------------------------------------------------
+ */
+-void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass)
+ {
+- u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq;
++ u8 ByteLane, MemClkFreq;
++ int32_t Seed_Gross;
++ int32_t Seed_Fine;
++ uint8_t Seed_PreGross;
+ u32 Value, Addr;
+ u16 Addl_Data_Offset, Addl_Data_Port;
+- u16 freq_tab[] = {400, 533, 667, 800};
++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
++ u16 fam10h_freq_tab[] = {400, 533, 667, 800};
++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
+
+- /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
+- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
++ if (is_fam15h()) {
++ /* MemClkFreq: 0x4: 333MHz; 0x6: 400MHz; 0xa: 533MHz; 0xe: 667MHz; 0x12: 800MHz; 0x16: 933MHz */
++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 4);
++ } else {
++ /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId,
++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
++ }
+
+ /* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the
+ * current memory subsystem configuration.
+ */
+- programODT(pMCTData, pDCTData, dimm);
++ programODT(pMCTstat, pDCTstat, dct, dimm);
+
+ /* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */
+- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) {
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL)) {
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1);
+ }
+ else
+ {
+ /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */
+- if (pDCTData->DctTrain)
++ if (dct)
+ {
+ Addl_Data_Offset=0x198;
+ Addl_Data_Port=0x19C;
+@@ -669,33 +996,94 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ DctAccessDone, DctAccessDone)) == 0);
+ }
+
++ if (is_fam15h())
++ proc_MFENCE();
++
+ /* Wait 10 MEMCLKs to allow for ODT signal settling. */
+- pMCTData->AgesaDelay(10);
++ if (is_fam15h())
++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 10);
++ else
++ pMCTData->AgesaDelay(10);
++
++ /* Program write levelling seed values */
+ if (pass == 1)
+ {
+- if (pDCTData->Status[DCT_STATUS_REGISTERED])
+- {
+- if(pDCTData->RegMan1Present & ((1<<(dimm*2+pDCTData->DctTrain))))
++ /* Pass 1 */
++ if (is_fam15h()) {
++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++ uint16_t Seed_Total = 0;
++ if (package_type == PT_GR) {
++ /* Socket G34: Fam15h BKDG v3.14 Table 96 */
++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ Seed_Total = 0x41;
++ } else if (pDCTData->Status[DCT_STATUS_LOAD_REDUCED]) {
++ Seed_Total = 0x0;
++ } else {
++ Seed_Total = 0xf;
++ }
++ } else if (package_type == PT_C3) {
++ /* Socket C32: Fam15h BKDG v3.14 Table 97 */
++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ Seed_Total = 0x3e;
++ } else if (pDCTData->Status[DCT_STATUS_LOAD_REDUCED]) {
++ Seed_Total = 0x0;
++ } else {
++ Seed_Total = 0x12;
++ }
++ } else if (package_type == PT_M2) {
++ /* Socket AM3: Fam15h BKDG v3.14 Table 98 */
++ Seed_Total = 0xf;
++ }
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
++ Seed_Total += ((AddrCmdPrelaunch)?0x10:0x0);
++
++ /* Adjust seed for the minimum platform supported frequency */
++ Seed_Total = (int32_t) (((((int64_t) Seed_Total) *
++ fam15h_freq_tab[MemClkFreq] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100)));
++
++ Seed_Gross = (Seed_Total >> 5) & 0x1f;
++ Seed_Fine = Seed_Total & 0x1f;
++
++ /* Save seed values for later use */
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++
++ if (Seed_Gross == 0)
++ Seed_PreGross = 0;
++ else if (Seed_Gross & 0x1)
++ Seed_PreGross = 1;
++ else
++ Seed_PreGross = 2;
++
++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
++ }
++ } else {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
+ {
+- Seed_Gross = 0x02;
+- Seed_Fine = 0x16;
++ if(pDCTData->RegMan1Present & ((1<<(dimm*2+dct))))
++ {
++ Seed_Gross = 0x02;
++ Seed_Fine = 0x16;
++ }
++ else
++ {
++ Seed_Gross = 0x02;
++ Seed_Fine = 0x00;
++ }
+ }
+ else
+ {
+- Seed_Gross = 0x02;
+- Seed_Fine = 0x00;
+- }
+- }
+- else
+- {
+- if (MemClkFreq == 6) {
+- /* DDR-800 */
+- Seed_Gross = 0x00;
+- Seed_Fine = 0x1a;
+- } else {
+- /* Use settings for DDR-400 (interpolated from BKDG) */
+- Seed_Gross = 0x00;
+- Seed_Fine = 0x0d;
++ if (MemClkFreq == 6) {
++ /* DDR-800 */
++ Seed_Gross = 0x00;
++ Seed_Fine = 0x1a;
++ } else {
++ /* Use settings for DDR-400 (interpolated from BKDG) */
++ Seed_Gross = 0x00;
++ Seed_Fine = 0x0d;
++ }
+ }
+ }
+ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
+@@ -711,39 +1099,91 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
+ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+ }
+- } else { /* Pass 2 */
++ } else {
++ /* Pass 2 */
+ /* From BKDG, Write Leveling Seed Value. */
+- u32 RegisterDelay, SeedTotal;
+- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
+- {
+- if (pDCTData->Status[DCT_STATUS_REGISTERED])
+- RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */
+- else
+- RegisterDelay = 0;
+- SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
+- (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5);
+- /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
+- training) - RegisterDelay. */
+- SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) *
+- freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100)));
+- Seed_Gross = SeedTotal / 32;
+- Seed_Fine = SeedTotal & 0x1f;
+- if (Seed_Gross == 0)
+- Seed_Gross = 0;
+- else if (Seed_Gross & 0x1)
+- Seed_Gross = 1;
+- else
+- Seed_Gross = 2;
+- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
+- pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ if (is_fam15h()) {
++ uint32_t RegisterDelay;
++ int32_t SeedTotal;
++ int32_t SeedTotalPreScaling;
++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */
++
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ if (AddrCmdPrelaunch)
++ RegisterDelay = 0x30;
++ else
++ RegisterDelay = 0x20;
++ } else {
++ RegisterDelay = 0;
++ }
++ /* Retrieve WrDqDqsEarly */
++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId), FUN_DCT, 0xa8), 25, 24, &Value);
++
++ /* Calculate adjusted seed values */
++ SeedTotal = (pDCTData->WLFineDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
++ ((pDCTData->WLGrossDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5);
++ SeedTotalPreScaling = (SeedTotal - RegisterDelay - (0x20 * Value));
++ SeedTotal = (int32_t) (RegisterDelay + ((((int64_t) SeedTotalPreScaling) *
++ fam15h_freq_tab[MemClkFreq] * 100) / (fam15h_freq_tab[pDCTData->WLPrevMemclkFreq] * 100)));
++
++ if (SeedTotal >= 0) {
++ Seed_Gross = SeedTotal / 32;
++ Seed_Fine = SeedTotal % 32;
++ } else {
++ Seed_Gross = (SeedTotal / 32) - 1;
++ Seed_Fine = (SeedTotal % 32) + 32;
++ }
++
++ if (Seed_Gross == 0)
++ Seed_PreGross = 0;
++ else if (Seed_Gross & 0x1)
++ Seed_PreGross = 1;
++ else
++ Seed_PreGross = 2;
++
++ /* Save seed values for later use */
++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
++
++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ }
++ } else {
++ u32 RegisterDelay, SeedTotal;
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
++ {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
++ RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */
++ else
++ RegisterDelay = 0;
++ SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
++ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5);
++ /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
++ training) - RegisterDelay. */
++ SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) *
++ fam10h_freq_tab[MemClkFreq-3] * 100) / (fam10h_freq_tab[0] * 100)));
++ Seed_Gross = SeedTotal / 32;
++ Seed_Fine = SeedTotal & 0x1f;
++ if (Seed_Gross == 0)
++ Seed_Gross = 0;
++ else if (Seed_Gross & 0x1)
++ Seed_Gross = 1;
++ else
++ Seed_Gross = 2;
++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
++ }
+ }
+ }
+
+- setWLByteDelay(pDCTData, ByteLane, dimm, 0);
++ pDCTData->WLPrevMemclkFreq = MemClkFreq;
++ setWLByteDelay(pDCTstat, dct, ByteLane, dimm, 0, pass);
+ }
+
+ /*-----------------------------------------------------------------------------
+- * void setWLByteDelay(DCTStruct *DCTData, u8 ByteLane, u8 Dimm){
++ * void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm){
+ *
+ * Description:
+ * This function writes the write levelization byte delay for the Phase
+@@ -763,8 +1203,9 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ *
+ *-----------------------------------------------------------------------------
+ */
+-void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr)
++void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, u8 targetAddr, uint8_t pass)
+ {
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+ u8 fineStartLoc, fineEndLoc, grossStartLoc, grossEndLoc, tempB, index, offsetAddr;
+ u32 addr, fineDelayValue, grossDelayValue, ValueLow, ValueHigh, EccValue, tempW;
+
+@@ -777,22 +1218,26 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr)
+ EccValue = 0;
+ while (ByteLane < MAX_BYTE_LANES)
+ {
+- /* This subtract 0xC workaround might be temporary. */
+- if ((pDCTData->WLPass==2) && (pDCTData->RegMan1Present & (1<<(dimm*2+pDCTData->DctTrain))))
+- {
+- tempW = (pDCTData->WLGrossDelay[index+ByteLane] << 5) | pDCTData->WLFineDelay[index+ByteLane];
+- tempW -= 0xC;
+- pDCTData->WLGrossDelay[index+ByteLane] = (u8)(tempW >> 5);
+- pDCTData->WLFineDelay[index+ByteLane] = (u8)(tempW & 0x1F);
+- }
+- grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane];
+- /* Adjust seed gross delay overflow (greater than 3):
+- * - Program seed gross delay as 2 (gross is 4 or 6) or 1 (gross is 5).
+- * - Keep original seed gross delay for later reference.
+- */
+- if(grossDelayValue >= 3)
+- {
+- grossDelayValue = (grossDelayValue&1)? 1 : 2;
++ if (is_fam15h()) {
++ grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane];
++ } else {
++ /* This subtract 0xC workaround might be temporary. */
++ if ((pDCTData->WLPass==2) && (pDCTData->RegMan1Present & (1<<(dimm*2+dct))))
++ {
++ tempW = (pDCTData->WLGrossDelay[index+ByteLane] << 5) | pDCTData->WLFineDelay[index+ByteLane];
++ tempW -= 0xC;
++ pDCTData->WLGrossDelay[index+ByteLane] = (u8)(tempW >> 5);
++ pDCTData->WLFineDelay[index+ByteLane] = (u8)(tempW & 0x1F);
++ }
++ grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane];
++ /* Adjust seed gross delay overflow (greater than 3):
++ * - Program seed gross delay as 2 (gross is 4 or 6) or 1 (gross is 5).
++ * - Keep original seed gross delay for later reference.
++ */
++ if(grossDelayValue >= 3)
++ {
++ grossDelayValue = (grossDelayValue&1)? 1 : 2;
++ }
+ }
+ fineDelayValue = pDCTData->WLFineDelay[index+ByteLane];
+ if (ByteLane < 4)
+@@ -803,15 +1248,16 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr)
+ EccValue = ((grossDelayValue << 5) | fineDelayValue);
+ ByteLane++;
+ }
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_CONT_ADD_PHASE_REC_CTRL_LOW, 0, 31, (u32)ValueLow);
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_CONT_ADD_PHASE_REC_CTRL_HIGH, 0, 31, (u32)ValueHigh);
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ DRAM_CONT_ADD_ECC_PHASE_REC_CTRL, 0, 31, (u32)EccValue);
+ }
+ else
+ {
++ /* Fam10h BKDG Rev. 3.62 2.8.9.9.1 (6) */
+ index = (u8)(MAX_BYTE_LANES * dimm);
+ grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane];
+ fineDelayValue = pDCTData->WLFineDelay[index+ByteLane];
+@@ -841,16 +1287,24 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr)
+ grossStartLoc = (u8)(fineEndLoc + 1);
+ grossEndLoc = (u8)(grossStartLoc + 1);
+
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ (u16)addr, fineStartLoc, fineEndLoc,(u32)fineDelayValue);
+- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
+ (u16)addr, grossStartLoc, grossEndLoc, (u32)grossDelayValue);
++
++ pDCTData->WLFineDelayPrevPass[index+ByteLane] = fineDelayValue;
++ pDCTData->WLGrossDelayPrevPass[index+ByteLane] = grossDelayValue;
++ if (pass == FirstPass) {
++ pDCTData->WLFineDelayFirstPass[index+ByteLane] = fineDelayValue;
++ pDCTData->WLGrossDelayFirstPass[index+ByteLane] = grossDelayValue;
++ pDCTData->WLCriticalGrossDelayFirstPass = pDCTData->WLCriticalGrossDelayPrevPass;
++ }
+ }
+
+ }
+
+ /*-----------------------------------------------------------------------------
+- * void getWLByteDelay(DCTStruct *DCTData, u8 ByteLane, u8 Dimm)
++ * void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm)
+ *
+ * Description:
+ * This function reads the write levelization byte delay from the Phase
+@@ -868,8 +1322,9 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr)
+ *
+ *-----------------------------------------------------------------------------
+ */
+-void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm)
++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass)
+ {
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
+ u8 fineStartLoc, fineEndLoc, grossStartLoc, grossEndLoc, tempB, tempB1, index;
+ u32 addr, fine, gross;
+ tempB = 0;
+@@ -890,25 +1345,31 @@ void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm)
+ grossStartLoc = (u8)(fineEndLoc + 1);
+ grossEndLoc = (u8)(grossStartLoc + 1);
+
+- fine = get_ADD_DCT_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId,
++ fine = get_ADD_DCT_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, (u16)addr, fineStartLoc, fineEndLoc);
+- gross = get_ADD_DCT_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId,
++ gross = get_ADD_DCT_Bits(pDCTData, dct, pDCTData->NodeId,
+ FUN_DCT, (u16)addr, grossStartLoc, grossEndLoc);
+- /* Adjust seed gross delay overflow (greater than 3):
+- * - Adjust the trained gross delay to the original seed gross delay.
+- */
+- if (pDCTData->WLGrossDelay[index+ByteLane] >= 3) {
+- gross += pDCTData->WLGrossDelay[index+ByteLane];
+- if(pDCTData->WLGrossDelay[index+ByteLane] & 1)
+- gross -= 1;
+- else
+- gross -= 2;
+- } else if ((pDCTData->WLGrossDelay[index+ByteLane] == 0) && (gross == 3)) {
+- /* If seed gross delay is 0 but PRE result gross delay is 3, it is negative.
+- * We will then round the negative number to 0.
++
++ if (!is_fam15h()) {
++ /* Adjust seed gross delay overflow (greater than 3):
++ * - Adjust the trained gross delay to the original seed gross delay.
+ */
+- gross = 0;
+- fine = 0;
++ if(pDCTData->WLGrossDelay[index+ByteLane] >= 3)
++ {
++ gross += pDCTData->WLGrossDelay[index+ByteLane];
++ if(pDCTData->WLGrossDelay[index+ByteLane] & 1)
++ gross -= 1;
++ else
++ gross -= 2;
++ }
++ else if((pDCTData->WLGrossDelay[index+ByteLane] == 0) && (gross == 3))
++ {
++ /* If seed gross delay is 0 but PRE result gross delay is 3, it is negative.
++ * We will then round the negative number to 0.
++ */
++ gross = 0;
++ fine = 0;
++ }
+ }
+ pDCTData->WLFineDelay[index+ByteLane] = (u8)fine;
+ pDCTData->WLGrossDelay[index+ByteLane] = (u8)gross;
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c
+index 0466c77..cf6afaa 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -134,24 +135,48 @@ static u32 get_Bits(sDCTStruct *pDCTData,
+ u16 offset, u8 low, u8 high)
+ {
+ u32 temp;
++ uint32_t dword;
++
+ /* ASSERT(node < MAX_NODES); */
+ if (dct == BOTH_DCTS)
+ {
+ /* Registers exist on DCT0 only */
++ if (is_fam15h())
++ {
++ /* Select DCT 0 */
++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++ dword &= ~0x1;
++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++ }
++
+ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
+ }
+ else
+ {
+- if (dct == 1)
++ if (is_fam15h())
+ {
+- /* Write to dct 1 */
+- offset += 0x100;
++ /* Select DCT */
++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++
++ /* Read from the selected DCT */
+ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
+ }
+ else
+ {
+- /* Write to dct 0 */
+- AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
++ if (dct == 1)
++ {
++ /* Read from dct 1 */
++ offset += 0x100;
++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
++ }
++ else
++ {
++ /* Read from dct 0 */
++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
++ }
+ }
+ }
+ return temp;
+@@ -184,25 +209,49 @@ static void set_Bits(sDCTStruct *pDCTData,
+ u16 offset, u8 low, u8 high, u32 value)
+ {
+ u32 temp;
++ uint32_t dword;
++
+ temp = value;
+
+ if (dct == BOTH_DCTS)
+ {
+ /* Registers exist on DCT0 only */
++ if (is_fam15h())
++ {
++ /* Select DCT 0 */
++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++ dword &= ~0x1;
++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++ }
++
+ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
+ }
+ else
+ {
+- if (dct == 1)
++ if (is_fam15h())
+ {
+- /* Write to dct 1 */
+- offset += 0x100;
++ /* Select DCT */
++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword);
++
++ /* Write to the selected DCT */
+ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
+ }
+ else
+ {
+- /* Write to dct 0 */
+- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
++ if (dct == 1)
++ {
++ /* Write to dct 1 */
++ offset += 0x100;
++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
++ }
++ else
++ {
++ /* Write to dct 0 */
++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp);
++ }
+ }
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h
+index f846d87..162340e 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -33,7 +34,8 @@
+ #define C_MAX_DIMMS 4 /* Maximum Number of DIMMs on each DCT */
+
+ /* STATUS Definition */
+-#define DCT_STATUS_REGISTERED 3 /* Registered DIMMs support */
++#define DCT_STATUS_REGISTERED 3 /* Registered DIMMs support */
++#define DCT_STATUS_LOAD_REDUCED 4 /* Load-Reduced DIMMs support */
+ #define DCT_STATUS_OnDimmMirror 24 /* OnDimmMirror support */
+
+ /* PCI Defintions */
+@@ -78,12 +80,18 @@
+ #define SendMrsCmd 26
+ #define Qoff 12
+ #define MRS_Level 7
+-#define MrsAddressStart 0
+-#define MrsAddressEnd 15
+-#define MrsBankStart 16
+-#define MrsBankEnd 18
+-#define MrsChipSelStart 20
+-#define MrsChipSelEnd 22
++#define MrsAddressStartFam10 0
++#define MrsAddressEndFam10 15
++#define MrsAddressStartFam15 0
++#define MrsAddressEndFam15 17
++#define MrsBankStartFam10 16
++#define MrsBankEndFam10 18
++#define MrsBankStartFam15 18
++#define MrsBankEndFam15 20
++#define MrsChipSelStartFam10 20
++#define MrsChipSelEndFam10 22
++#define MrsChipSelStartFam15 21
++#define MrsChipSelEndFam15 23
+ #define ASR 18
+ #define SRT 19
+ #define DramTermDynStart 10
+@@ -115,10 +123,32 @@ typedef struct _sDCTStruct
+ u8 DctTrain; /* Current DCT being trained */
+ u8 CurrDct; /* Current DCT number (0 or 1) */
+ u8 DctCSPresent; /* Current DCT CS mapping */
++ int32_t WLSeedGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Gross Delay */
++ /* per byte Lane Per Logical DIMM*/
++ int32_t WLSeedFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Fine Delay */
++ /* per byte Lane Per Logical DIMM*/
++ int32_t WLSeedPreGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Pre-Gross Delay */
++ /* per byte Lane Per Logical DIMM*/
+ u8 WLGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Gross Delay */
+ /* per byte Lane Per Logical DIMM*/
+ u8 WLFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Fine Delay */
+ /* per byte Lane Per Logical DIMM*/
++ u8 WLGrossDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Gross Delay */
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLFineDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Fine Delay */
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLGrossDelayPrevPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Previous Pass Write Levelization Gross Delay */
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLFineDelayPrevPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Previous Pass Write Levelization Fine Delay */
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLGrossDelayFinalPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Final-Pass Write Levelization Gross Delay */
++ /* per byte Lane Per Logical DIMM*/
++ u8 WLFineDelayFinalPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Final-Pass Write Levelization Fine Delay */
++ /* per byte Lane Per Logical DIMM*/
++ int32_t WLCriticalGrossDelayFirstPass;
++ int32_t WLCriticalGrossDelayPrevPass;
++ int32_t WLCriticalGrossDelayFinalPass;
++ uint16_t WLPrevMemclkFreq;
+ u16 RegMan1Present;
+ u8 DimmPresent[MAX_TOTAL_DIMMS];/* Indicates which DIMMs are present */
+ /* from Total Number of DIMMs(per Node)*/
+@@ -132,7 +162,7 @@ typedef struct _sDCTStruct
+ /* per byte lane */
+ u8 MaxDimmsInstalled; /* Max Dimms Installed for current DCT */
+ u8 DimmRanks[MAX_TOTAL_DIMMS]; /* Total Number of Ranks(per Dimm) */
+- u32 LogicalCPUID;
++ uint64_t LogicalCPUID;
+ u8 WLPass;
+ } sDCTStruct;
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c b/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c
+index c9bcac1..aa23951 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c
+@@ -18,6 +18,7 @@
+ */
+
+ #include <string.h>
++#include <arch/cpu.h>
+ #include <arch/acpi.h>
+ #include <cpu/x86/msr.h>
+ #include <device/device.h>
+@@ -32,6 +33,23 @@
+
+ #define S3NV_FILE_NAME "s3nv"
+
++#ifdef __RAMSTAGE__
++static inline uint8_t is_fam15h(void)
++{
++ uint8_t fam15h = 0;
++ uint32_t family;
++
++ family = cpuid_eax(0x80000001);
++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
++
++ if (family >= 0x6f)
++ /* Family 15h or later */
++ fam15h = 1;
++
++ return fam15h;
++}
++#endif
++
+ static ssize_t get_s3nv_file_offset(void);
+
+ ssize_t get_s3nv_file_offset(void)
+@@ -47,6 +65,28 @@ ssize_t get_s3nv_file_offset(void)
+ return s3nv_region.region.offset;
+ }
+
++static uint32_t read_config32_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t reg) {
++ if (is_fam15h()) {
++ uint32_t dword;
++#ifdef __PRE_RAM__
++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1);
++#else
++ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1));
++#endif
++
++ /* Select DCT */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ pci_write_config32(dev_fn1, 0x10c, dword);
++ } else {
++ /* Apply offset */
++ reg += dct * 0x100;
++ }
++
++ return pci_read_config32(dev, reg);
++}
++
+ static uint32_t read_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg, uint32_t index)
+ {
+ uint32_t dword;
+@@ -61,12 +101,54 @@ static uint32_t read_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg
+ return dword;
+ }
+
++static uint32_t read_amd_dct_index_register_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t index_ctl_reg, uint32_t index)
++{
++ if (is_fam15h()) {
++ uint32_t dword;
++#ifdef __PRE_RAM__
++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1);
++#else
++ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1));
++#endif
++
++ /* Select DCT */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ pci_write_config32(dev_fn1, 0x10c, dword);
++ } else {
++ /* Apply offset */
++ index_ctl_reg += dct * 0x100;
++ }
++
++ return read_amd_dct_index_register(dev, index_ctl_reg, index);
++}
++
+ #ifdef __RAMSTAGE__
+ static uint64_t rdmsr_uint64_t(unsigned long index) {
+ msr_t msr = rdmsr(index);
+ return (((uint64_t)msr.hi) << 32) | ((uint64_t)msr.lo);
+ }
+
++static uint32_t read_config32_dct_nbpstate(device_t dev, uint8_t node, uint8_t dct, uint8_t nb_pstate, uint32_t reg) {
++ uint32_t dword;
++ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1));
++
++ /* Select DCT */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ pci_write_config32(dev_fn1, 0x10c, dword);
++
++ /* Select NB Pstate index */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~(0x3 << 4);
++ dword |= (nb_pstate & 0x3) << 4;
++ pci_write_config32(dev_fn1, 0x10c, dword);
++
++ return pci_read_config32(dev, reg);
++}
++
+ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_data)
+ {
+ uint8_t i;
+@@ -82,7 +164,8 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da
+ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1));
+ device_t dev_fn2 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 2));
+ device_t dev_fn3 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 3));
+- if ((!dev_fn1) || (!dev_fn2) || (!dev_fn3)) {
++ /* Test for node presence */
++ if ((!dev_fn1) || (pci_read_config32(dev_fn1, PCI_VENDOR_ID) == 0xffffffff)) {
+ persistent_data->node[node].node_present = 0;
+ continue;
+ }
+@@ -95,22 +178,22 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da
+ data->f2x110 = pci_read_config32(dev_fn2, 0x110);
+
+ /* Stage 2 */
+- data->f1x40 = pci_read_config32(dev_fn1, 0x40 + (0x100 * channel));
+- data->f1x44 = pci_read_config32(dev_fn1, 0x44 + (0x100 * channel));
+- data->f1x48 = pci_read_config32(dev_fn1, 0x48 + (0x100 * channel));
+- data->f1x4c = pci_read_config32(dev_fn1, 0x4c + (0x100 * channel));
+- data->f1x50 = pci_read_config32(dev_fn1, 0x50 + (0x100 * channel));
+- data->f1x54 = pci_read_config32(dev_fn1, 0x54 + (0x100 * channel));
+- data->f1x58 = pci_read_config32(dev_fn1, 0x58 + (0x100 * channel));
+- data->f1x5c = pci_read_config32(dev_fn1, 0x5c + (0x100 * channel));
+- data->f1x60 = pci_read_config32(dev_fn1, 0x60 + (0x100 * channel));
+- data->f1x64 = pci_read_config32(dev_fn1, 0x64 + (0x100 * channel));
+- data->f1x68 = pci_read_config32(dev_fn1, 0x68 + (0x100 * channel));
+- data->f1x6c = pci_read_config32(dev_fn1, 0x6c + (0x100 * channel));
+- data->f1x70 = pci_read_config32(dev_fn1, 0x70 + (0x100 * channel));
+- data->f1x74 = pci_read_config32(dev_fn1, 0x74 + (0x100 * channel));
+- data->f1x78 = pci_read_config32(dev_fn1, 0x78 + (0x100 * channel));
+- data->f1x7c = pci_read_config32(dev_fn1, 0x7c + (0x100 * channel));
++ data->f1x40 = read_config32_dct(dev_fn1, node, channel, 0x40);
++ data->f1x44 = read_config32_dct(dev_fn1, node, channel, 0x44);
++ data->f1x48 = read_config32_dct(dev_fn1, node, channel, 0x48);
++ data->f1x4c = read_config32_dct(dev_fn1, node, channel, 0x4c);
++ data->f1x50 = read_config32_dct(dev_fn1, node, channel, 0x50);
++ data->f1x54 = read_config32_dct(dev_fn1, node, channel, 0x54);
++ data->f1x58 = read_config32_dct(dev_fn1, node, channel, 0x58);
++ data->f1x5c = read_config32_dct(dev_fn1, node, channel, 0x5c);
++ data->f1x60 = read_config32_dct(dev_fn1, node, channel, 0x60);
++ data->f1x64 = read_config32_dct(dev_fn1, node, channel, 0x64);
++ data->f1x68 = read_config32_dct(dev_fn1, node, channel, 0x68);
++ data->f1x6c = read_config32_dct(dev_fn1, node, channel, 0x6c);
++ data->f1x70 = read_config32_dct(dev_fn1, node, channel, 0x70);
++ data->f1x74 = read_config32_dct(dev_fn1, node, channel, 0x74);
++ data->f1x78 = read_config32_dct(dev_fn1, node, channel, 0x78);
++ data->f1x7c = read_config32_dct(dev_fn1, node, channel, 0x7c);
+ data->f1xf0 = pci_read_config32(dev_fn1, 0xf0);
+ data->f1x120 = pci_read_config32(dev_fn1, 0x120);
+ data->f1x124 = pci_read_config32(dev_fn1, 0x124);
+@@ -134,75 +217,144 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da
+ data->msrc001001f = rdmsr_uint64_t(0xc001001f);
+
+ /* Stage 3 */
+- data->f2x40 = pci_read_config32(dev_fn2, 0x40 + (0x100 * channel));
+- data->f2x44 = pci_read_config32(dev_fn2, 0x44 + (0x100 * channel));
+- data->f2x48 = pci_read_config32(dev_fn2, 0x48 + (0x100 * channel));
+- data->f2x4c = pci_read_config32(dev_fn2, 0x4c + (0x100 * channel));
+- data->f2x50 = pci_read_config32(dev_fn2, 0x50 + (0x100 * channel));
+- data->f2x54 = pci_read_config32(dev_fn2, 0x54 + (0x100 * channel));
+- data->f2x58 = pci_read_config32(dev_fn2, 0x58 + (0x100 * channel));
+- data->f2x5c = pci_read_config32(dev_fn2, 0x5c + (0x100 * channel));
+- data->f2x60 = pci_read_config32(dev_fn2, 0x60 + (0x100 * channel));
+- data->f2x64 = pci_read_config32(dev_fn2, 0x64 + (0x100 * channel));
+- data->f2x68 = pci_read_config32(dev_fn2, 0x68 + (0x100 * channel));
+- data->f2x6c = pci_read_config32(dev_fn2, 0x6c + (0x100 * channel));
+- data->f2x78 = pci_read_config32(dev_fn2, 0x78 + (0x100 * channel));
+- data->f2x7c = pci_read_config32(dev_fn2, 0x7c + (0x100 * channel));
+- data->f2x80 = pci_read_config32(dev_fn2, 0x80 + (0x100 * channel));
+- data->f2x84 = pci_read_config32(dev_fn2, 0x84 + (0x100 * channel));
+- data->f2x88 = pci_read_config32(dev_fn2, 0x88 + (0x100 * channel));
+- data->f2x8c = pci_read_config32(dev_fn2, 0x8c + (0x100 * channel));
+- data->f2x90 = pci_read_config32(dev_fn2, 0x90 + (0x100 * channel));
+- data->f2xa4 = pci_read_config32(dev_fn2, 0xa4 + (0x100 * channel));
+- data->f2xa8 = pci_read_config32(dev_fn2, 0xa8 + (0x100 * channel));
++ data->f2x40 = read_config32_dct(dev_fn2, node, channel, 0x40);
++ data->f2x44 = read_config32_dct(dev_fn2, node, channel, 0x44);
++ data->f2x48 = read_config32_dct(dev_fn2, node, channel, 0x48);
++ data->f2x4c = read_config32_dct(dev_fn2, node, channel, 0x4c);
++ data->f2x50 = read_config32_dct(dev_fn2, node, channel, 0x50);
++ data->f2x54 = read_config32_dct(dev_fn2, node, channel, 0x54);
++ data->f2x58 = read_config32_dct(dev_fn2, node, channel, 0x58);
++ data->f2x5c = read_config32_dct(dev_fn2, node, channel, 0x5c);
++ data->f2x60 = read_config32_dct(dev_fn2, node, channel, 0x60);
++ data->f2x64 = read_config32_dct(dev_fn2, node, channel, 0x64);
++ data->f2x68 = read_config32_dct(dev_fn2, node, channel, 0x68);
++ data->f2x6c = read_config32_dct(dev_fn2, node, channel, 0x6c);
++ data->f2x78 = read_config32_dct(dev_fn2, node, channel, 0x78);
++ data->f2x7c = read_config32_dct(dev_fn2, node, channel, 0x7c);
++ data->f2x80 = read_config32_dct(dev_fn2, node, channel, 0x80);
++ data->f2x84 = read_config32_dct(dev_fn2, node, channel, 0x84);
++ data->f2x88 = read_config32_dct(dev_fn2, node, channel, 0x88);
++ data->f2x8c = read_config32_dct(dev_fn2, node, channel, 0x8c);
++ data->f2x90 = read_config32_dct(dev_fn2, node, channel, 0x90);
++ data->f2xa4 = read_config32_dct(dev_fn2, node, channel, 0xa4);
++ data->f2xa8 = read_config32_dct(dev_fn2, node, channel, 0xa8);
++
++ /* Family 15h-specific configuration */
++ if (is_fam15h()) {
++ data->f2x200 = read_config32_dct(dev_fn2, node, channel, 0x200);
++ data->f2x204 = read_config32_dct(dev_fn2, node, channel, 0x204);
++ data->f2x208 = read_config32_dct(dev_fn2, node, channel, 0x208);
++ data->f2x20c = read_config32_dct(dev_fn2, node, channel, 0x20c);
++ for (i=0; i<4; i++)
++ data->f2x210[i] = read_config32_dct_nbpstate(dev_fn2, node, channel, i, 0x210);
++ data->f2x214 = read_config32_dct(dev_fn2, node, channel, 0x214);
++ data->f2x218 = read_config32_dct(dev_fn2, node, channel, 0x218);
++ data->f2x21c = read_config32_dct(dev_fn2, node, channel, 0x21c);
++ data->f2x22c = read_config32_dct(dev_fn2, node, channel, 0x22c);
++ data->f2x230 = read_config32_dct(dev_fn2, node, channel, 0x230);
++ data->f2x234 = read_config32_dct(dev_fn2, node, channel, 0x234);
++ data->f2x238 = read_config32_dct(dev_fn2, node, channel, 0x238);
++ data->f2x23c = read_config32_dct(dev_fn2, node, channel, 0x23c);
++ data->f2x240 = read_config32_dct(dev_fn2, node, channel, 0x240);
++
++ data->f2x9cx0d0fe003 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe003);
++ data->f2x9cx0d0fe013 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe013);
++ for (i=0; i<9; i++)
++ data->f2x9cx0d0f0_8_0_1f[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f001f | (i << 8));
++ data->f2x9cx0d0f201f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f201f);
++ data->f2x9cx0d0f211f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f211f);
++ data->f2x9cx0d0f221f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f221f);
++ data->f2x9cx0d0f801f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f801f);
++ data->f2x9cx0d0f811f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f811f);
++ data->f2x9cx0d0f821f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f821f);
++ data->f2x9cx0d0fc01f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc01f);
++ data->f2x9cx0d0fc11f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc11f);
++ data->f2x9cx0d0fc21f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc21f);
++ data->f2x9cx0d0f4009 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f4009);
++ for (i=0; i<9; i++)
++ data->f2x9cx0d0f0_8_0_02[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0002 | (i << 8));
++ for (i=0; i<9; i++)
++ data->f2x9cx0d0f0_8_0_06[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0006 | (i << 8));
++ for (i=0; i<9; i++)
++ data->f2x9cx0d0f0_8_0_0a[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f000a | (i << 8));
++
++ data->f2x9cx0d0f2002 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2002);
++ data->f2x9cx0d0f2102 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2102);
++ data->f2x9cx0d0f2202 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2202);
++ data->f2x9cx0d0f8002 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8002);
++ data->f2x9cx0d0f8006 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8006);
++ data->f2x9cx0d0f800a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f800a);
++ data->f2x9cx0d0f8102 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8102);
++ data->f2x9cx0d0f8106 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8106);
++ data->f2x9cx0d0f810a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f810a);
++ data->f2x9cx0d0fc002 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc002);
++ data->f2x9cx0d0fc006 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc006);
++ data->f2x9cx0d0fc00a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc00a);
++ data->f2x9cx0d0fc00e = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc00e);
++ data->f2x9cx0d0fc012 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc012);
++
++ data->f2x9cx0d0f2031 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2031);
++ data->f2x9cx0d0f2131 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2131);
++ data->f2x9cx0d0f2231 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2231);
++ data->f2x9cx0d0f8031 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8031);
++ data->f2x9cx0d0f8131 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8131);
++ data->f2x9cx0d0f8231 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8231);
++ data->f2x9cx0d0fc031 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc031);
++ data->f2x9cx0d0fc131 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc131);
++ data->f2x9cx0d0fc231 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc231);
++ for (i=0; i<9; i++)
++ data->f2x9cx0d0f0_0_f_31[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0031 | (i << 8));
++
++ data->f2x9cx0d0f8021 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8021);
++ }
+
+ /* Stage 4 */
+- data->f2x94 = pci_read_config32(dev_fn2, 0x94 + (0x100 * channel));
++ data->f2x94 = read_config32_dct(dev_fn2, node, channel, 0x94);
+
+ /* Stage 6 */
+ for (i=0; i<9; i++)
+ for (j=0; j<3; j++)
+- data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4));
+- data->f2x9cx00 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x00);
+- data->f2x9cx0a = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0a);
+- data->f2x9cx0c = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0c);
++ data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4));
++ data->f2x9cx00 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x00);
++ data->f2x9cx0a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0a);
++ data->f2x9cx0c = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0c);
+
+ /* Stage 7 */
+- data->f2x9cx04 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x04);
++ data->f2x9cx04 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x04);
+
+ /* Stage 9 */
+- data->f2x9cx0d0fe006 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0fe006);
+- data->f2x9cx0d0fe007 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0fe007);
++ data->f2x9cx0d0fe006 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe006);
++ data->f2x9cx0d0fe007 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe007);
+
+ /* Stage 10 */
+ for (i=0; i<12; i++)
+- data->f2x9cx10[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x10 + i);
++ data->f2x9cx10[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x10 + i);
+ for (i=0; i<12; i++)
+- data->f2x9cx20[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x20 + i);
++ data->f2x9cx20[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x20 + i);
+ for (i=0; i<4; i++)
+ for (j=0; j<3; j++)
+- data->f2x9cx3_0_0_3_1[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), (0x01 + i) + (0x100 * j));
++ data->f2x9cx3_0_0_3_1[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, (0x01 + i) + (0x100 * j));
+ for (i=0; i<4; i++)
+ for (j=0; j<3; j++)
+- data->f2x9cx3_0_0_7_5[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), (0x05 + i) + (0x100 * j));
+- data->f2x9cx0d = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d);
++ data->f2x9cx3_0_0_7_5[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, (0x05 + i) + (0x100 * j));
++ data->f2x9cx0d = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d);
+ for (i=0; i<9; i++)
+- data->f2x9cx0d0f0_f_0_13[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0013 | (i << 8));
++ data->f2x9cx0d0f0_f_0_13[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0013 | (i << 8));
+ for (i=0; i<9; i++)
+- data->f2x9cx0d0f0_f_0_30[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0030 | (i << 8));
++ data->f2x9cx0d0f0_f_0_30[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0030 | (i << 8));
+ for (i=0; i<4; i++)
+- data->f2x9cx0d0f2_f_0_30[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f2030 | (i << 8));
++ data->f2x9cx0d0f2_f_0_30[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2030 | (i << 8));
+ for (i=0; i<2; i++)
+ for (j=0; j<3; j++)
+- data->f2x9cx0d0f8_8_4_0[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4));
+- data->f2x9cx0d0f812f = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f812f);
++ data->f2x9cx0d0f8_8_4_0[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4));
++ data->f2x9cx0d0f812f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f812f);
+
+ /* Stage 11 */
+ if (IS_ENABLED(CONFIG_DIMM_DDR3)) {
+ for (i=0; i<12; i++)
+- data->f2x9cx30[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x30 + i);
++ data->f2x9cx30[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x30 + i);
+ for (i=0; i<12; i++)
+- data->f2x9cx40[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x40 + i);
++ data->f2x9cx40[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x40 + i);
+ }
+
+ /* Other */
+@@ -212,6 +364,43 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da
+ }
+ }
+ #else
++static void write_config32_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t reg, uint32_t value) {
++ if (is_fam15h()) {
++ uint32_t dword;
++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1);
++
++ /* Select DCT */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ pci_write_config32(dev_fn1, 0x10c, dword);
++ } else {
++ /* Apply offset */
++ reg += dct * 0x100;
++ }
++
++ pci_write_config32(dev, reg, value);
++}
++
++static void write_config32_dct_nbpstate(device_t dev, uint8_t node, uint8_t dct, uint8_t nb_pstate, uint32_t reg, uint32_t value) {
++ uint32_t dword;
++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1);
++
++ /* Select DCT */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ pci_write_config32(dev_fn1, 0x10c, dword);
++
++ /* Select NB Pstate index */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~(0x3 << 4);
++ dword |= (nb_pstate & 0x3) << 4;
++ pci_write_config32(dev_fn1, 0x10c, dword);
++
++ pci_write_config32(dev, reg, value);
++}
++
+ static void write_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg, uint32_t index, uint32_t value)
+ {
+ uint32_t dword;
+@@ -223,6 +412,25 @@ static void write_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg, u
+ dword = pci_read_config32(dev, index_ctl_reg);
+ } while (!(dword & (1 << 31)));
+ }
++
++static void write_amd_dct_index_register_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t index_ctl_reg, uint32_t index, uint32_t value)
++{
++ if (is_fam15h()) {
++ uint32_t dword;
++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1);
++
++ /* Select DCT */
++ dword = pci_read_config32(dev_fn1, 0x10c);
++ dword &= ~0x1;
++ dword |= (dct & 0x1);
++ pci_write_config32(dev_fn1, 0x10c, dword);
++ } else {
++ /* Apply offset */
++ index_ctl_reg += dct * 0x100;
++ }
++
++ return write_amd_dct_index_register(dev, index_ctl_reg, index, value);
++}
+ #endif
+
+ #ifdef __PRE_RAM__
+@@ -262,31 +470,31 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ if (!persistent_data->node[node].node_present)
+ continue;
+
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x40 + (0x100 * channel), data->f1x40);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x44 + (0x100 * channel), data->f1x44);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x48 + (0x100 * channel), data->f1x48);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x4c + (0x100 * channel), data->f1x4c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x50 + (0x100 * channel), data->f1x50);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x54 + (0x100 * channel), data->f1x54);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x58 + (0x100 * channel), data->f1x58);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x5c + (0x100 * channel), data->f1x5c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x60 + (0x100 * channel), data->f1x60);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x64 + (0x100 * channel), data->f1x64);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x68 + (0x100 * channel), data->f1x68);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x6c + (0x100 * channel), data->f1x6c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x70 + (0x100 * channel), data->f1x70);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x74 + (0x100 * channel), data->f1x74);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x78 + (0x100 * channel), data->f1x78);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x7c + (0x100 * channel), data->f1x7c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0xf0 + (0x100 * channel), data->f1xf0);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x120 + (0x100 * channel), data->f1x120);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x124 + (0x100 * channel), data->f1x124);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x10c + (0x100 * channel), data->f2x10c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x114 + (0x100 * channel), data->f2x114);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x118 + (0x100 * channel), data->f2x118);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x11c + (0x100 * channel), data->f2x11c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x1b0 + (0x100 * channel), data->f2x1b0);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 3), 0x44 + (0x100 * channel), data->f3x44);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x40, data->f1x40);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x44, data->f1x44);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x48, data->f1x48);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x4c, data->f1x4c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x50, data->f1x50);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x54, data->f1x54);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x58, data->f1x58);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x5c, data->f1x5c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x60, data->f1x60);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x64, data->f1x64);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x68, data->f1x68);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x6c, data->f1x6c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x70, data->f1x70);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x74, data->f1x74);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x78, data->f1x78);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x7c, data->f1x7c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0xf0, data->f1xf0);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x120, data->f1x120);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x124, data->f1x124);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x10c, data->f2x10c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x114, data->f2x114);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x118, data->f2x118);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x11c, data->f2x11c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x1b0, data->f2x1b0);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 3), node, channel, 0x44, data->f3x44);
+ for (i=0; i<16; i++) {
+ wrmsr_uint64_t(0x00000200 | i, data->msr0000020[i]);
+ }
+@@ -313,31 +521,97 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ if (!persistent_data->node[node].node_present)
+ continue;
+
+- ganged = !!(data->f2x110 & 0x10);
++ if (is_fam15h())
++ ganged = 0;
++ else
++ ganged = !!(data->f2x110 & 0x10);
+ if ((ganged == 1) && (channel > 0))
+ continue;
+
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x40 + (0x100 * channel), data->f2x40);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x44 + (0x100 * channel), data->f2x44);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x48 + (0x100 * channel), data->f2x48);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x4c + (0x100 * channel), data->f2x4c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x50 + (0x100 * channel), data->f2x50);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x54 + (0x100 * channel), data->f2x54);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x58 + (0x100 * channel), data->f2x58);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x5c + (0x100 * channel), data->f2x5c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x60 + (0x100 * channel), data->f2x60);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x64 + (0x100 * channel), data->f2x64);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x68 + (0x100 * channel), data->f2x68);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x6c + (0x100 * channel), data->f2x6c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x78 + (0x100 * channel), data->f2x78);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x7c + (0x100 * channel), data->f2x7c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x80 + (0x100 * channel), data->f2x80);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x84 + (0x100 * channel), data->f2x84);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x88 + (0x100 * channel), data->f2x88);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x8c + (0x100 * channel), data->f2x8c);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel), data->f2x90);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0xa4 + (0x100 * channel), data->f2xa4);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0xa8 + (0x100 * channel), data->f2xa8);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x40, data->f2x40);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x44, data->f2x44);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x48, data->f2x48);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x4c, data->f2x4c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x50, data->f2x50);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x54, data->f2x54);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x58, data->f2x58);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x5c, data->f2x5c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x60, data->f2x60);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x64, data->f2x64);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x68, data->f2x68);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x6c, data->f2x6c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x78, data->f2x78);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x7c, data->f2x7c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x80, data->f2x80);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x84, data->f2x84);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x88, data->f2x88);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x8c, data->f2x8c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90, data->f2x90);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0xa4, data->f2xa4);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0xa8, data->f2xa8);
++ }
++ }
++
++ /* Family 15h-specific configuration */
++ if (is_fam15h()) {
++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) {
++ for (channel = 0; channel < 2; channel++) {
++ struct amd_s3_persistent_mct_channel_data* data = &persistent_data->node[node].channel[channel];
++ if (!persistent_data->node[node].node_present)
++ continue;
++
++ /* Initialize DCT */
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0000000b, 0x80000000);
++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe013);
++ dword &= ~0xffff;
++ dword |= 0x118;
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe013, dword);
++
++ /* Restore values */
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x200, data->f2x200);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x204, data->f2x204);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x208, data->f2x208);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x20c, data->f2x20c);
++ for (i=0; i<4; i++)
++ write_config32_dct_nbpstate(PCI_DEV(0, 0x18 + node, 2), node, channel, i, 0x210, data->f2x210[i]);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x214, data->f2x214);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x218, data->f2x218);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x21c, data->f2x21c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x22c, data->f2x22c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x230, data->f2x230);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x234, data->f2x234);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x238, data->f2x238);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x23c, data->f2x23c);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x240, data->f2x240);
++
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe013, data->f2x9cx0d0fe013);
++ for (i=0; i<9; i++)
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f001f | (i << 8), data->f2x9cx0d0f0_8_0_1f[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f201f, data->f2x9cx0d0f201f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f211f, data->f2x9cx0d0f211f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f221f, data->f2x9cx0d0f221f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f801f, data->f2x9cx0d0f801f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f811f, data->f2x9cx0d0f811f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f821f, data->f2x9cx0d0f821f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc01f, data->f2x9cx0d0fc01f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc11f, data->f2x9cx0d0fc11f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc21f, data->f2x9cx0d0fc21f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f4009, data->f2x9cx0d0f4009);
++
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2031, data->f2x9cx0d0f2031);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2131, data->f2x9cx0d0f2131);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2231, data->f2x9cx0d0f2231);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8031, data->f2x9cx0d0f8031);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8131, data->f2x9cx0d0f8131);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8231, data->f2x9cx0d0f8231);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc031, data->f2x9cx0d0fc031);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc131, data->f2x9cx0d0fc131);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc231, data->f2x9cx0d0fc231);
++ for (i=0; i<9; i++)
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0031 | (i << 8), data->f2x9cx0d0f0_0_f_31[i]);
++
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8021, data->f2x9cx0d0f8021);
++ }
+ }
+ }
+
+@@ -348,33 +622,44 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ if (!persistent_data->node[node].node_present)
+ continue;
+
+- ganged = !!(data->f2x110 & 0x10);
++ if (is_fam15h())
++ ganged = 0;
++ else
++ ganged = !!(data->f2x110 & 0x10);
+ if ((ganged == 1) && (channel > 0))
+ continue;
+
+- /* Disable PHY auto-compensation engine */
+- dword = read_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08);
+- if (!(dword & (1 << 30))) {
+- dword |= (1 << 30);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08, dword);
+-
+- /* Wait for 5us */
+- mct_Wait(100);
++ if (is_fam15h()) {
++ /* Program PllLockTime = 0x190 */
++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006);
++ dword &= ~0xffff;
++ dword |= 0x190;
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006, dword);
++
++ /* Program MemClkFreqVal = 0 */
++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94);
++ dword &= (0x1 << 7);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94, dword);
++
++ /* Restore DRAM Adddress/Timing Control Register */
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x04, data->f2x9cx04);
++ } else {
++ /* Disable PHY auto-compensation engine */
++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08);
++ if (!(dword & (1 << 30))) {
++ dword |= (1 << 30);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08, dword);
++
++ /* Wait for 5us */
++ mct_Wait(100);
++ }
+ }
+
+ /* Restore DRAM Configuration High Register */
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x94 + (0x100 * channel), data->f2x94);
+-
+- /* Enable PHY auto-compensation engine */
+- dword = read_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08);
+- dword &= ~(1 << 30);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08, dword);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94, data->f2x94);
+ }
+ }
+
+- /* Wait for 750us */
+- mct_Wait(15000);
+-
+ /* Stage 5 */
+ for (node = 0; node < MAX_NODES_SUPPORTED; node++) {
+ for (channel = 0; channel < 2; channel++) {
+@@ -382,17 +667,40 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ if (!persistent_data->node[node].node_present)
+ continue;
+
+- ganged = !!(data->f2x110 & 0x10);
++ if (is_fam15h())
++ ganged = 0;
++ else
++ ganged = !!(data->f2x110 & 0x10);
+ if ((ganged == 1) && (channel > 0))
+ continue;
+
++ dct_enabled = !(data->f2x94 & (1 << 14));
++ if (!dct_enabled)
++ continue;
++
+ /* Wait for any pending PHY frequency changes to complete */
+ do {
+- dword = read_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08);
++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94);
+ } while (dword & (1 << 21));
++
++ if (is_fam15h()) {
++ /* Program PllLockTime = 0xf */
++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006);
++ dword &= ~0xffff;
++ dword |= 0xf;
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006, dword);
++ } else {
++ /* Enable PHY auto-compensation engine */
++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08);
++ dword &= ~(1 << 30);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08, dword);
++ }
+ }
+ }
+
++ /* Wait for 750us */
++ mct_Wait(15000);
++
+ /* Stage 6 */
+ for (node = 0; node < MAX_NODES_SUPPORTED; node++) {
+ for (channel = 0; channel < 2; channel++) {
+@@ -402,10 +710,49 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+
+ for (i=0; i<9; i++)
+ for (j=0; j<3; j++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j]);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x00, data->f2x9cx00);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0a, data->f2x9cx0a);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0c, data->f2x9cx0c);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x00, data->f2x9cx00);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0a, data->f2x9cx0a);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0c, data->f2x9cx0c);
++ }
++ }
++
++ /* Family 15h-specific configuration */
++ if (is_fam15h()) {
++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) {
++ for (channel = 0; channel < 2; channel++) {
++ struct amd_s3_persistent_mct_channel_data* data = &persistent_data->node[node].channel[channel];
++ if (!persistent_data->node[node].node_present)
++ continue;
++
++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe003);
++ dword |= (0x3 << 13); /* DisAutoComp, DisablePredriverCal = 1 */
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe003, dword);
++
++ for (i=0; i<9; i++)
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0006 | (i << 8), data->f2x9cx0d0f0_8_0_06[i]);
++ for (i=0; i<9; i++)
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f000a | (i << 8), data->f2x9cx0d0f0_8_0_0a[i]);
++ for (i=0; i<9; i++)
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0002 | (i << 8), (0x8000 | data->f2x9cx0d0f0_8_0_02[i]));
++
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8006, data->f2x9cx0d0f8006);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f800a, data->f2x9cx0d0f800a);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8106, data->f2x9cx0d0f8106);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f810a, data->f2x9cx0d0f810a);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc006, data->f2x9cx0d0fc006);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc00a, data->f2x9cx0d0fc00a);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc00e, data->f2x9cx0d0fc00e);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc012, data->f2x9cx0d0fc012);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8002, (0x8000 | data->f2x9cx0d0f8002));
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8102, (0x8000 | data->f2x9cx0d0f8102));
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc002, (0x8000 | data->f2x9cx0d0fc002));
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2002, (0x8000 | data->f2x9cx0d0f2002));
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2102, (0x8000 | data->f2x9cx0d0f2102));
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2202, (0x8000 | data->f2x9cx0d0f2202));
++
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe003, data->f2x9cx0d0fe003);
++ }
+ }
+ }
+
+@@ -416,11 +763,15 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ if (!persistent_data->node[node].node_present)
+ continue;
+
+- ganged = !!(data->f2x110 & 0x10);
++ if (is_fam15h())
++ ganged = 0;
++ else
++ ganged = !!(data->f2x110 & 0x10);
+ if ((ganged == 1) && (channel > 0))
+ continue;
+
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x04, data->f2x9cx04);
++ if (!is_fam15h())
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x04, data->f2x9cx04);
+ }
+ }
+
+@@ -435,16 +786,19 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ if (!dct_enabled)
+ continue;
+
+- ganged = !!(data->f2x110 & 0x10);
++ if (is_fam15h())
++ ganged = 0;
++ else
++ ganged = !!(data->f2x110 & 0x10);
+ if ((ganged == 1) && (channel > 0))
+ continue;
+
+ printk(BIOS_SPEW, "Taking DIMMs out of self refresh node: %d channel: %d\n", node, channel);
+
+ /* Exit self refresh mode */
+- dword = pci_read_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel));
++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90);
+ dword |= (1 << 1);
+- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel), dword);
++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90, dword);
+ }
+ }
+
+@@ -463,12 +817,12 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+
+ /* Wait for transition from self refresh mode to complete */
+ do {
+- dword = pci_read_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel));
++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90);
+ } while (dword & (1 << 1));
+
+ /* Restore registers */
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0fe006, data->f2x9cx0d0fe006);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0fe007, data->f2x9cx0d0fe007);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006, data->f2x9cx0d0fe006);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe007, data->f2x9cx0d0fe007);
+ }
+ }
+
+@@ -480,26 +834,26 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ continue;
+
+ for (i=0; i<12; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x10 + i, data->f2x9cx10[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x10 + i, data->f2x9cx10[i]);
+ for (i=0; i<12; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x20 + i, data->f2x9cx20[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x20 + i, data->f2x9cx20[i]);
+ for (i=0; i<4; i++)
+ for (j=0; j<3; j++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), (0x01 + i) + (0x100 * j), data->f2x9cx3_0_0_3_1[i][j]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, (0x01 + i) + (0x100 * j), data->f2x9cx3_0_0_3_1[i][j]);
+ for (i=0; i<4; i++)
+ for (j=0; j<3; j++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), (0x05 + i) + (0x100 * j), data->f2x9cx3_0_0_7_5[i][j]);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d, data->f2x9cx0d);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, (0x05 + i) + (0x100 * j), data->f2x9cx3_0_0_7_5[i][j]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d, data->f2x9cx0d);
+ for (i=0; i<9; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0013 | (i << 8), data->f2x9cx0d0f0_f_0_13[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0013 | (i << 8), data->f2x9cx0d0f0_f_0_13[i]);
+ for (i=0; i<9; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0030 | (i << 8), data->f2x9cx0d0f0_f_0_30[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0030 | (i << 8), data->f2x9cx0d0f0_f_0_30[i]);
+ for (i=0; i<4; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f2030 | (i << 8), data->f2x9cx0d0f2_f_0_30[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2030 | (i << 8), data->f2x9cx0d0f2_f_0_30[i]);
+ for (i=0; i<2; i++)
+ for (j=0; j<3; j++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f8_8_4_0[i][j]);
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f812f, data->f2x9cx0d0f812f);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f8_8_4_0[i][j]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f812f, data->f2x9cx0d0f812f);
+ }
+ }
+
+@@ -512,9 +866,9 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste
+ continue;
+
+ for (i=0; i<12; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x30 + i, data->f2x9cx30[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x30 + i, data->f2x9cx30[i]);
+ for (i=0; i<12; i++)
+- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x40 + i, data->f2x9cx40[i]);
++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x40 + i, data->f2x9cx40[i]);
+ }
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/wrappers/mcti.h b/src/northbridge/amd/amdmct/wrappers/mcti.h
+index 38e66e1..2aba377 100644
+--- a/src/northbridge/amd/amdmct/wrappers/mcti.h
++++ b/src/northbridge/amd/amdmct/wrappers/mcti.h
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -62,10 +63,15 @@ UPDATE AS NEEDED
+ #endif
+
+ #ifndef MEM_MAX_LOAD_FREQ
+-#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
+- #define MEM_MAX_LOAD_FREQ 800
+-#else
+- #define MEM_MAX_LOAD_FREQ 400
++#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
++ #define MEM_MAX_LOAD_FREQ 933
++ #define MEM_MIN_PLATFORM_FREQ_FAM10 400
++ #define MEM_MIN_PLATFORM_FREQ_FAM15 333
++#else /* AMD_FAM10_DDR2 */
++ #define MEM_MAX_LOAD_FREQ 400
++ #define MEM_MIN_PLATFORM_FREQ_FAM10 200
++ /* DDR2 not available on Family 15h */
++ #define MEM_MIN_PLATFORM_FREQ_FAM15 0
+ #endif
+ #endif
+
+diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
+index 444adc5..9969c4f 100644
+--- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c
++++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
+@@ -44,7 +44,7 @@
+ #define MINIMUM_DRAM_BELOW_4G 0x1000000
+
+ static const uint16_t ddr2_limits[4] = {400, 333, 266, 200};
+-static const uint16_t ddr3_limits[4] = {800, 666, 533, 400};
++static const uint16_t ddr3_limits[16] = {933, 800, 666, 533, 400, 333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ static u16 mctGet_NVbits(u8 index)
+ {
+@@ -81,12 +81,19 @@ static u16 mctGet_NVbits(u8 index)
+ if (get_option(&nvram, "max_mem_clock") == CB_SUCCESS) {
+ int limit = val;
+ if (IS_ENABLED(CONFIG_DIMM_DDR3))
+- limit = ddr3_limits[nvram & 3];
++ limit = ddr3_limits[nvram & 0xf];
+ else if (IS_ENABLED(CONFIG_DIMM_DDR2))
+- limit = ddr2_limits[nvram & 3];
++ limit = ddr2_limits[nvram & 0x3];
+ val = min(limit, val);
+ }
+ break;
++ case NV_MIN_MEMCLK:
++ /* Minimum platform supported memclk */
++ if (is_fam15h())
++ val = MEM_MIN_PLATFORM_FREQ_FAM15;
++ else
++ val = MEM_MIN_PLATFORM_FREQ_FAM10;
++ break;
+ case NV_ECC_CAP:
+ #if SYSTEM_TYPE == SERVER
+ val = 1; /* memory bus ECC capable */
+@@ -254,6 +261,9 @@ static u16 mctGet_NVbits(u8 index)
+ case NV_L2BKScrub:
+ val = 0; /* Disabled - See L2Scrub in BKDG */
+ break;
++ case NV_L3BKScrub:
++ val = 0; /* Disabled - See L3Scrub in BKDG */
++ break;
+ case NV_DCBKScrub:
+ val = 0; /* Disabled - See DcacheScrub in BKDG */
+ break;
+@@ -303,6 +313,9 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat)
+ int ch2_count = 0;
+ uint8_t ch1_registered = 0;
+ uint8_t ch2_registered = 0;
++ uint8_t ch1_voltage = 0;
++ uint8_t ch2_voltage = 0;
++ uint8_t highest_rank_count[2];
+ int i;
+ for (i = 0; i < 15; i = i + 2) {
+ if (pDCTstat->DIMMValid & (1 << i))
+@@ -321,8 +334,28 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat)
+ printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 2: %d DIMM(s) detected\n", ch2_count);
+ }
+
++#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
++ uint8_t dimm;
++
++ for (i = 0; i < 15; i = i + 2) {
++ if (pDCTstat->DIMMValid & (1 << i))
++ ch1_voltage |= pDCTstat->DimmConfiguredVoltage[i];
++ if (pDCTstat->DIMMValid & (1 << (i + 1)))
++ ch2_voltage |= pDCTstat->DimmConfiguredVoltage[i + 1];
++ }
++
++ for (i = 0; i < 2; i++) {
++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[i];
++ highest_rank_count[i] = 0x0;
++ for (dimm = 0; dimm < 8; dimm++) {
++ if (pDCTData->DimmRanks[dimm] > highest_rank_count[i])
++ highest_rank_count[i] = pDCTData->DimmRanks[dimm];
++ }
++ }
++#endif
++
+ /* Set limits if needed */
+- pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), (ch1_registered || ch2_registered), pDCTstat->PresetmaxFreq);
++ pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), max(highest_rank_count[0], highest_rank_count[1]), (ch1_registered || ch2_registered), (ch1_voltage | ch2_voltage), pDCTstat->PresetmaxFreq);
+ }
+
+ #ifdef UNUSED_CODE
+@@ -486,7 +519,7 @@ static void mctHookAfterAnyTraining(void)
+ {
+ }
+
+-static u32 mctGetLogicalCPUID_D(u8 node)
++static uint64_t mctGetLogicalCPUID_D(u8 node)
+ {
+ return mctGetLogicalCPUID(node);
+ }
+--
+1.7.9.5
+