From 05953f6713c3e93946786057e22e9962afa47ed9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 16 Oct 2012 22:52:03 -0300 Subject: [PATCH] Fix i82975x_edac (BZ#848149) This driver was soooo broken that I needed to rewrote the entire logic for memory identification and for memory error report. While here, enable EDAC_DEBUG, on debug kernels. Signed-off-by: Mauro Carvalho Chehab --- config-debug | 2 + config-nodebug | 2 + config-powerpc-generic | 1 - config-x86-generic | 1 - i82975x-edac-fix.patch | 855 +++++++++++++++++++++++++++++++++++++++++ kernel.spec | 10 +- 6 files changed, 866 insertions(+), 5 deletions(-) create mode 100644 i82975x-edac-fix.patch diff --git a/config-debug b/config-debug index db0aa6d4e..e1b3f9db2 100644 --- a/config-debug +++ b/config-debug @@ -112,3 +112,5 @@ CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=1024 CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y CONFIG_MAC80211_MESSAGE_TRACING=y + +CONFIG_EDAC_DEBUG=y diff --git a/config-nodebug b/config-nodebug index 5b4187680..c471b853e 100644 --- a/config-nodebug +++ b/config-nodebug @@ -112,3 +112,5 @@ CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=1024 CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y # CONFIG_MAC80211_MESSAGE_TRACING is not set + +# CONFIG_EDAC_DEBUG is not set diff --git a/config-powerpc-generic b/config-powerpc-generic index 58c3393a2..a263acc80 100644 --- a/config-powerpc-generic +++ b/config-powerpc-generic @@ -181,7 +181,6 @@ CONFIG_PMAC_APM_EMU=m CONFIG_HW_RANDOM_PASEMI=m CONFIG_EDAC=y -# CONFIG_EDAC_DEBUG is not set CONFIG_EDAC_MM_EDAC=m CONFIG_EDAC_PASEMI=m CONFIG_EDAC_AMD8131=m diff --git a/config-x86-generic b/config-x86-generic index de9582a01..09d63d401 100644 --- a/config-x86-generic +++ b/config-x86-generic @@ -151,7 +151,6 @@ CONFIG_DELL_RBU=m CONFIG_DCDBAS=m CONFIG_EDAC=y -# CONFIG_EDAC_DEBUG is not set CONFIG_EDAC_MM_EDAC=m CONFIG_EDAC_AMD76X=m CONFIG_EDAC_AMD8111=m diff --git a/i82975x-edac-fix.patch b/i82975x-edac-fix.patch new file mode 100644 index 000000000..f44ac7791 --- /dev/null +++ b/i82975x-edac-fix.patch @@ -0,0 +1,855 @@ +commit 9370a8d717720f6b17221490fea8d798396d9f2f +Author: Mauro Carvalho Chehab +Date: Mon Oct 15 21:49:35 2012 -0300 + + i82975x_edac: Use the edac standard debug macro + + Instead of declaring its own debug macro, that requires + to uncomment part of the code, use the edac standard macro + to add the debug code, and the edac debug level to print it, + just like any other EDAC driver. + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c +index a980204..f998d2c 100644 +--- a/drivers/edac/i82975x_edac.c ++++ b/drivers/edac/i82975x_edac.c +@@ -435,11 +435,9 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, + } + } + +-/* #define i82975x_DEBUG_IOMEM */ +- +-#ifdef i82975x_DEBUG_IOMEM +-static void i82975x_print_dram_timings(void __iomem *mch_window) ++static void i82975x_print_dram_config(void __iomem *mch_window, u32 mchbar, u32 *drc) + { ++#ifdef CONFIG_EDAC_DEBUG + /* + * The register meanings are from Intel specs; + * (shows 13-5-5-5 for 800-DDR2) +@@ -448,26 +446,63 @@ static void i82975x_print_dram_timings(void __iomem *mch_window) + */ + static const int caslats[4] = { 5, 4, 3, 6 }; + u32 dtreg[2]; ++ u8 c0drb[4]; ++ u8 c1drb[4]; ++ ++ if (!edac_debug_level) ++ return; ++ ++ i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n", ++ mchbar, mch_window); ++ ++ c0drb[0] = readb(mch_window + I82975X_DRB_CH0R0); ++ c0drb[1] = readb(mch_window + I82975X_DRB_CH0R1); ++ c0drb[2] = readb(mch_window + I82975X_DRB_CH0R2); ++ c0drb[3] = readb(mch_window + I82975X_DRB_CH0R3); ++ c1drb[0] = readb(mch_window + I82975X_DRB_CH1R0); ++ c1drb[1] = readb(mch_window + I82975X_DRB_CH1R1); ++ c1drb[2] = readb(mch_window + I82975X_DRB_CH1R2); ++ c1drb[3] = readb(mch_window + I82975X_DRB_CH1R3); ++ i82975x_printk(KERN_INFO, "DRBCH0R0 = 0x%02x\n", c0drb[0]); ++ i82975x_printk(KERN_INFO, "DRBCH0R1 = 0x%02x\n", c0drb[1]); ++ i82975x_printk(KERN_INFO, "DRBCH0R2 = 0x%02x\n", c0drb[2]); ++ i82975x_printk(KERN_INFO, "DRBCH0R3 = 0x%02x\n", c0drb[3]); ++ i82975x_printk(KERN_INFO, "DRBCH1R0 = 0x%02x\n", c1drb[0]); ++ i82975x_printk(KERN_INFO, "DRBCH1R1 = 0x%02x\n", c1drb[1]); ++ i82975x_printk(KERN_INFO, "DRBCH1R2 = 0x%02x\n", c1drb[2]); ++ i82975x_printk(KERN_INFO, "DRBCH1R3 = 0x%02x\n", c1drb[3]); ++ ++ i82975x_printk(KERN_INFO, "DRC_CH0 = %0x, %s\n", drc[0], ++ ((drc[0] >> 21) & 3) == 1 ? ++ "ECC enabled" : "ECC disabled"); ++ i82975x_printk(KERN_INFO, "DRC_CH1 = %0x, %s\n", drc[1], ++ ((drc[1] >> 21) & 3) == 1 ? ++ "ECC enabled" : "ECC disabled"); ++ ++ i82975x_printk(KERN_INFO, "C0 BNKARC = %0x\n", ++ readw(mch_window + I82975X_C0BNKARC)); ++ i82975x_printk(KERN_INFO, "C1 BNKARC = %0x\n", ++ readw(mch_window + I82975X_C1BNKARC)); + + dtreg[0] = readl(mch_window + 0x114); + dtreg[1] = readl(mch_window + 0x194); +- i82975x_printk(KERN_INFO, "DRAM Timings : Ch0 Ch1\n" ++ i82975x_printk(KERN_INFO, ++ "DRAM Timings : Ch0 Ch1\n" + " RAS Active Min = %d %d\n" + " CAS latency = %d %d\n" + " RAS to CAS = %d %d\n" + " RAS precharge = %d %d\n", + (dtreg[0] >> 19 ) & 0x0f, +- (dtreg[1] >> 19) & 0x0f, ++ (dtreg[1] >> 19) & 0x0f, + caslats[(dtreg[0] >> 8) & 0x03], +- caslats[(dtreg[1] >> 8) & 0x03], ++ caslats[(dtreg[1] >> 8) & 0x03], + ((dtreg[0] >> 4) & 0x07) + 2, +- ((dtreg[1] >> 4) & 0x07) + 2, ++ ((dtreg[1] >> 4) & 0x07) + 2, + (dtreg[0] & 0x07) + 2, +- (dtreg[1] & 0x07) + 2 ++ (dtreg[1] & 0x07) + 2 + ); +- +-} + #endif ++} + + static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) + { +@@ -480,10 +515,6 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) + u32 drc[2]; + struct i82975x_error_info discard; + int chans; +-#ifdef i82975x_DEBUG_IOMEM +- u8 c0drb[4]; +- u8 c1drb[4]; +-#endif + + edac_dbg(0, "\n"); + +@@ -495,45 +526,11 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) + mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ + mch_window = ioremap_nocache(mchbar, 0x1000); + +-#ifdef i82975x_DEBUG_IOMEM +- i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n", +- mchbar, mch_window); +- +- c0drb[0] = readb(mch_window + I82975X_DRB_CH0R0); +- c0drb[1] = readb(mch_window + I82975X_DRB_CH0R1); +- c0drb[2] = readb(mch_window + I82975X_DRB_CH0R2); +- c0drb[3] = readb(mch_window + I82975X_DRB_CH0R3); +- c1drb[0] = readb(mch_window + I82975X_DRB_CH1R0); +- c1drb[1] = readb(mch_window + I82975X_DRB_CH1R1); +- c1drb[2] = readb(mch_window + I82975X_DRB_CH1R2); +- c1drb[3] = readb(mch_window + I82975X_DRB_CH1R3); +- i82975x_printk(KERN_INFO, "DRBCH0R0 = 0x%02x\n", c0drb[0]); +- i82975x_printk(KERN_INFO, "DRBCH0R1 = 0x%02x\n", c0drb[1]); +- i82975x_printk(KERN_INFO, "DRBCH0R2 = 0x%02x\n", c0drb[2]); +- i82975x_printk(KERN_INFO, "DRBCH0R3 = 0x%02x\n", c0drb[3]); +- i82975x_printk(KERN_INFO, "DRBCH1R0 = 0x%02x\n", c1drb[0]); +- i82975x_printk(KERN_INFO, "DRBCH1R1 = 0x%02x\n", c1drb[1]); +- i82975x_printk(KERN_INFO, "DRBCH1R2 = 0x%02x\n", c1drb[2]); +- i82975x_printk(KERN_INFO, "DRBCH1R3 = 0x%02x\n", c1drb[3]); +-#endif +- + drc[0] = readl(mch_window + I82975X_DRC_CH0M0); + drc[1] = readl(mch_window + I82975X_DRC_CH1M0); +-#ifdef i82975x_DEBUG_IOMEM +- i82975x_printk(KERN_INFO, "DRC_CH0 = %0x, %s\n", drc[0], +- ((drc[0] >> 21) & 3) == 1 ? +- "ECC enabled" : "ECC disabled"); +- i82975x_printk(KERN_INFO, "DRC_CH1 = %0x, %s\n", drc[1], +- ((drc[1] >> 21) & 3) == 1 ? +- "ECC enabled" : "ECC disabled"); + +- i82975x_printk(KERN_INFO, "C0 BNKARC = %0x\n", +- readw(mch_window + I82975X_C0BNKARC)); +- i82975x_printk(KERN_INFO, "C1 BNKARC = %0x\n", +- readw(mch_window + I82975X_C1BNKARC)); +- i82975x_print_dram_timings(mch_window); +- goto fail1; +-#endif ++ i82975x_print_dram_config(mch_window, mchbar, drc); ++ + if (!(((drc[0] >> 21) & 3) == 1 || ((drc[1] >> 21) & 3) == 1)) { + i82975x_printk(KERN_INFO, "ECC disabled on both channels.\n"); + goto fail1; + +commit 8992ed2f4295eab137e1713fa16be5546a759373 +Author: Mauro Carvalho Chehab +Date: Mon Oct 15 21:48:48 2012 -0300 + + i82975x_edac: Fix dimm label initialization + + The driver has only 4 hardcoded labels, but allows much more memory. + Fix it by removing the hardcoded logic, using snprintf() instead. + + [ 19.833972] general protection fault: 0000 [#1] SMP + [ 19.837733] Modules linked in: i82975x_edac(+) edac_core firewire_ohci firewire_core crc_itu_t nouveau mxm_wmi wmi video i2c_algo_bit drm_kms_helper ttm drm i2c_core + [ 19.837733] CPU 0 + [ 19.837733] Pid: 390, comm: udevd Not tainted 3.6.1-1.fc17.x86_64.debug #1 Dell Inc. Precision WorkStation 390 /0MY510 + [ 19.837733] RIP: 0010:[] [] strncpy+0x18/0x30 + [ 19.837733] RSP: 0018:ffff880078535b68 EFLAGS: 00010202 + [ 19.837733] RAX: ffff880069fa9708 RBX: ffff880078588000 RCX: ffff880069fa9708 + [ 19.837733] RDX: 000000000000001f RSI: 5f706f5f63616465 RDI: ffff880069fa9708 + [ 19.837733] RBP: ffff880078535b68 R08: ffff880069fa9727 R09: 000000000000fffe + [ 19.837733] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000003 + [ 19.837733] R13: 0000000000000000 R14: ffff880069fa9290 R15: ffff880079624a80 + [ 19.837733] FS: 00007f3de01ee840(0000) GS:ffff88007c400000(0000) knlGS:0000000000000000 + [ 19.837733] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 19.837733] CR2: 00007f3de00b9000 CR3: 0000000078dbc000 CR4: 00000000000007f0 + [ 19.837733] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [ 19.837733] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + [ 19.837733] Process udevd (pid: 390, threadinfo ffff880078534000, task ffff880079642450) + [ 19.837733] Stack: + [ 19.837733] ffff880078535c18 ffffffffa017c6b8 00040000816d627f ffff880079624a88 + [ 19.837733] ffffc90004cd6000 ffff880079624520 ffff88007ac21148 0000000000000000 + [ 19.837733] 0000000000000000 0004000000000000 feda000078535bc8 ffffffff810d696d + [ 19.837733] Call Trace: + [ 19.837733] [] i82975x_init_one+0x2e6/0x3e6 [i82975x_edac] + ... + + Fix bug reported at: + https://bugzilla.redhat.com/show_bug.cgi?id=848149 + And, very likely: + https://bbs.archlinux.org/viewtopic.php?id=148033 + https://bugzilla.kernel.org/show_bug.cgi?id=47171 + + Cc: Alan Cox + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c +index 069e26c..a980204 100644 +--- a/drivers/edac/i82975x_edac.c ++++ b/drivers/edac/i82975x_edac.c +@@ -370,10 +370,6 @@ static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) + static void i82975x_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, void __iomem *mch_window) + { +- static const char *labels[4] = { +- "DIMM A1", "DIMM A2", +- "DIMM B1", "DIMM B2" +- }; + struct csrow_info *csrow; + unsigned long last_cumul_size; + u8 value; +@@ -423,9 +419,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, + dimm = mci->csrows[index]->channels[chan]->dimm; + + dimm->nr_pages = nr_pages / csrow->nr_channels; +- strncpy(csrow->channels[chan]->dimm->label, +- labels[(index >> 1) + (chan * 2)], +- EDAC_MC_LABEL_LEN); ++ ++ snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d", ++ (chan == 0) ? 'A' : 'B', ++ index); + dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ + dimm->dtype = i82975x_dram_type(mch_window, index); + dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ +commit 6e7636c972951ba0c6c640758b1dcc7667cb2415 +Author: Mauro Carvalho Chehab +Date: Tue Oct 16 15:30:23 2012 -0300 + + i82975x_edac: rewrite the entire fill/report logic + + There are so many bugs at the fill/error report logic that + the code ended by being re-written. + + Issues solved: + + - DIMM labels were "randomly" filled: they won't + match the memory layout, due to a series of bugs on it; + + - The memory controller supports 3 different modes: + single, dual interleaved and dual async. The logic there were + written considering the dual interleaved one (yet, some + single mode support was there); + + - The boundary limit to decide on each channel the error happens, + at dual interleaved mode, is given by bit 6 of the error address, + and not bit 1. The practical effect is that Corrected errors + will have a 50% of chance of pointing to the right DIMM. Only + the DIMM pair logic were OK; + + - The asymetric mode weren't properly supported. The driver would + handle an asymetric mode as 'single mode', with doesn't actually + match it, creating some weird real/virtual DIMM mappings. + + - Some other random bugs got fixed. + + Tested on a Dell Precision N390, on dual interleaved mode. + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c +index f998d2c..082e91e 100644 +--- a/drivers/edac/i82975x_edac.c ++++ b/drivers/edac/i82975x_edac.c +@@ -6,7 +6,17 @@ + * GNU General Public License. + * + * Written by Arvind R. +- * Copied from i82875p_edac.c source: ++ * Copied from i82875p_edac.c source, ++ * ++ * (c) 2012 Mauro Carvalho Chehab ++ * Driver re-written in order to fix lots of issues on it: ++ * - Fix Single Mode; ++ * - Add support for Asymetrical mode ++ * - Fix several issues with Interleaved mode ++ * - Fix memory label logic ++ * ++ * Intel datasheet: Intel(R) 975X Express Chipset Datasheet ++ * http://www.intel.com/assets/pdf/datasheet/310158.pdf + */ + + #include +@@ -29,8 +39,8 @@ + #define PCI_DEVICE_ID_INTEL_82975_0 0x277c + #endif /* PCI_DEVICE_ID_INTEL_82975_0 */ + +-#define I82975X_NR_DIMMS 8 +-#define I82975X_NR_CSROWS(nr_chans) (I82975X_NR_DIMMS / (nr_chans)) ++#define DIMMS_PER_CHANNEL 4 ++#define NUM_CHANNELS 2 + + /* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */ + #define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b) +@@ -112,7 +122,7 @@ NOTE: Only ONE of the three must be enabled + /* NOTE: Following addresses have to indexed using MCHBAR offset (44h, 32b) */ + /* Intel 82975x memory mapped register space */ + +-#define I82975X_DRB_SHIFT 25 /* fixed 32MiB grain */ ++#define I82975X_DRB_SHIFT 25 /* fixed 2^25 = 32 MiB grain */ + + #define I82975X_DRB 0x100 /* DRAM Row Boundary (8b x 8) + * +@@ -152,6 +162,10 @@ NOTE: Only ONE of the three must be enabled + #define I82975X_DRA_CH1R01 0x188 + #define I82975X_DRA_CH1R23 0x189 + ++/* Channels 0/1 DRAM Timing Register 1 */ ++#define I82975X_C0DRT1 0x114 ++#define I82975X_C1DRT1 0x194 ++ + + #define I82975X_BNKARC 0x10e /* Type of device in each rank - Bank Arch (16b) + * +@@ -206,8 +220,16 @@ enum i82975x_chips { + I82975X = 0, + }; + ++struct mem_range { ++ u32 start, end; ++}; ++ + struct i82975x_pvt { +- void __iomem *mch_window; ++ void __iomem *mch_window; ++ int num_channels; ++ bool is_symetric; ++ u8 drb[DIMMS_PER_CHANNEL][NUM_CHANNELS]; ++ struct mem_range page[DIMMS_PER_CHANNEL][NUM_CHANNELS]; + }; + + struct i82975x_dev_info { +@@ -278,8 +300,10 @@ static void i82975x_get_error_info(struct mem_ctl_info *mci, + static int i82975x_process_error_info(struct mem_ctl_info *mci, + struct i82975x_error_info *info, int handle_errors) + { +- int row, chan; +- unsigned long offst, page; ++ struct i82975x_pvt *pvt = mci->pvt_info; ++ struct mem_range *range; ++ unsigned int row, chan, grain; ++ unsigned long offst, page; + + if (!(info->errsts2 & 0x0003)) + return 0; +@@ -293,36 +317,70 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci, + info->errsts = info->errsts2; + } + ++ /* Calculate page and offset of the error */ ++ + page = (unsigned long) info->eap; + page >>= 1; ++ + if (info->xeap & 1) + page |= 0x80000000; + page >>= (PAGE_SHIFT - 1); +- row = edac_mc_find_csrow_by_page(mci, page); +- +- if (row == -1) { +- i82975x_mc_printk(mci, KERN_ERR, "error processing EAP:\n" +- "\tXEAP=%u\n" +- "\t EAP=0x%08x\n" +- "\tPAGE=0x%08x\n", +- (info->xeap & 1) ? 1 : 0, info->eap, (unsigned int) page); +- return 0; ++ ++ if (pvt->is_symetric) ++ grain = 1 << 7; ++ else ++ grain = 1 << 6; ++ ++ offst = info->eap & ((1 << PAGE_SHIFT) - (1 << grain)); ++ ++ /* ++ * Search for the DIMM chip that match the error page. ++ * ++ * On Symmetric mode, this will always return channel = 0, as ++ * both channel A and B ranges are identical. ++ * A latter logic will determinte the channel on symetric mode ++ * ++ * On asymetric mode or single mode, there will be just one match, ++ * that will point to the csrow with the error. ++ */ ++ for (chan = 0; chan < pvt->num_channels; chan++) { ++ for (row = 0; row < DIMMS_PER_CHANNEL; row++) { ++ range = &pvt->page[row][chan]; ++ ++ if (page >= range->start && page <= range->end) ++ goto found; ++ } + } +- chan = (mci->csrows[row]->nr_channels == 1) ? 0 : info->eap & 1; +- offst = info->eap +- & ((1 << PAGE_SHIFT) - +- (1 << mci->csrows[row]->channels[chan]->dimm->grain)); ++ chan = -1; ++ row = -1; + +- if (info->errsts & 0x0002) ++found: ++ if (info->errsts & 0x0002) { ++ /* ++ * On uncorrected error, ECC doesn't allow do determine the ++ * channel where the error has occurred. ++ */ + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + page, offst, 0, + row, -1, -1, + "i82975x UE", ""); +- else +- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, +- page, offst, info->derrsyn, +- row, chan ? chan : 0, -1, +- "i82975x CE", ""); ++ return 1; ++ } ++ ++ if (pvt->is_symetric && row >= 0) { ++ /* ++ * On Symetric mode, the memory switch happens after each ++ * cache line (64 byte boundary). Channel 0 goes first. ++ */ ++ if (info->eap & (1 << 6)) ++ chan = 1; ++ else ++ chan = 0; ++ } ++ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, ++ page, offst, info->derrsyn, ++ row, chan, -1, ++ "i82975x CE", ""); + + return 1; + } +@@ -331,111 +389,143 @@ static void i82975x_check(struct mem_ctl_info *mci) + { + struct i82975x_error_info info; + +- edac_dbg(1, "MC%d\n", mci->mc_idx); ++ edac_dbg(4, "MC%d\n", mci->mc_idx); + i82975x_get_error_info(mci, &info); + i82975x_process_error_info(mci, &info, 1); + } + +-/* Return 1 if dual channel mode is active. Else return 0. */ +-static int dual_channel_active(void __iomem *mch_window) ++/** ++ * detect_memory_style - Detect on what mode the memory controller is programmed ++ * ++ * @pvt: pointer to the private structure ++ * ++ * This function detects how many channels are in use, and if the memory ++ * controller is in symetric (interleaved) or asymetric mode. There's no ++ * need to distinguish between asymetric and single mode, as the routines ++ * that fill the csrows data and handle error are written in order to handle ++ * both at the same way. ++ */ ++static void detect_memory_style(struct i82975x_pvt *pvt) + { +- /* +- * We treat interleaved-symmetric configuration as dual-channel - EAP's +- * bit-0 giving the channel of the error location. +- * +- * All other configurations are treated as single channel - the EAP's +- * bit-0 will resolve ok in symmetric area of mixed +- * (symmetric/asymmetric) configurations +- */ +- u8 drb[4][2]; + int row; +- int dualch; ++ bool has_chan_a = false; ++ bool has_chan_b = false; ++ ++ pvt->is_symetric = true; ++ pvt->num_channels = 0; ++ ++ for (row = 0; row < DIMMS_PER_CHANNEL; row++) { ++ pvt->drb[row][0] = readb(pvt->mch_window + I82975X_DRB + row); ++ pvt->drb[row][1] = readb(pvt->mch_window + I82975X_DRB + row + 0x80); ++ ++ /* On symetric mode, both channels have the same boundaries */ ++ if (pvt->drb[row][0] != pvt->drb[row][1]) ++ pvt->is_symetric = false; + +- for (dualch = 1, row = 0; dualch && (row < 4); row++) { +- drb[row][0] = readb(mch_window + I82975X_DRB + row); +- drb[row][1] = readb(mch_window + I82975X_DRB + row + 0x80); +- dualch = dualch && (drb[row][0] == drb[row][1]); ++ if (pvt->drb[row][0]) ++ has_chan_a = true; ++ if (pvt->drb[row][1]) ++ has_chan_b = true; + } +- return dualch; +-} + +-static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) +-{ +- /* +- * ECC is possible on i92975x ONLY with DEV_X8 +- */ +- return DEV_X8; ++ if (has_chan_a) ++ pvt->num_channels++; ++ ++ if (has_chan_b) ++ pvt->num_channels++; + } + + static void i82975x_init_csrows(struct mem_ctl_info *mci, +- struct pci_dev *pdev, void __iomem *mch_window) ++ struct i82975x_pvt *pvt, ++ struct pci_dev *pdev) + { +- struct csrow_info *csrow; +- unsigned long last_cumul_size; +- u8 value; +- u32 cumul_size, nr_pages; +- int index, chan; +- struct dimm_info *dimm; +- enum dev_type dtype; +- +- last_cumul_size = 0; ++ struct dimm_info *dimm; ++ struct mem_range *range; ++ u8 boundary; ++ u32 initial_page = 0, last_page; ++ int row, chan; + + /* +- * 82875 comment: +- * The dram row boundary (DRB) reg values are boundary address +- * for each DRAM row with a granularity of 32 or 64MB (single/dual +- * channel operation). DRB regs are cumulative; therefore DRB7 will +- * contain the total memory contained in all rows. +- * ++ * This chipset provides 3 address modes: ++ * Single channel - either Channel A or channel B is filled ++ * Dual channel, interleaved: Memory is organized in pairs, ++ * where channel A gets the lower address for each pair ++ * Dual channel, asymmetric: Channel A memory goes first. ++ * In order to cover all modes, we need to start describing ++ * memories considering the dual channel, asymmetric one. + */ + +- for (index = 0; index < mci->nr_csrows; index++) { +- csrow = mci->csrows[index]; +- +- value = readb(mch_window + I82975X_DRB + index + +- ((index >= 4) ? 0x80 : 0)); +- cumul_size = value; +- cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT); +- /* +- * Adjust cumul_size w.r.t number of channels +- * +- */ +- if (csrow->nr_channels > 1) +- cumul_size <<= 1; +- edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size); +- +- nr_pages = cumul_size - last_cumul_size; +- if (!nr_pages) +- continue; +- ++ for (chan = 0; chan < pvt->num_channels; chan++) { + /* +- * Initialise dram labels +- * index values: +- * [0-7] for single-channel; i.e. csrow->nr_channels = 1 +- * [0-3] for dual-channel; i.e. csrow->nr_channels = 2 ++ * On symetric mode, both channels start from address 0 + */ +- dtype = i82975x_dram_type(mch_window, index); +- for (chan = 0; chan < csrow->nr_channels; chan++) { +- dimm = mci->csrows[index]->channels[chan]->dimm; +- +- dimm->nr_pages = nr_pages / csrow->nr_channels; +- +- snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d", +- (chan == 0) ? 'A' : 'B', +- index); +- dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ +- dimm->dtype = i82975x_dram_type(mch_window, index); ++ if (pvt->is_symetric) ++ initial_page = 0; ++ ++ for (row = 0; row < DIMMS_PER_CHANNEL; row++) { ++ boundary = pvt->drb[row][chan]; ++ dimm = mci->csrows[row]->channels[chan]->dimm; ++ ++ last_page = boundary << (I82975X_DRB_SHIFT - PAGE_SHIFT); ++ dimm->nr_pages = last_page - initial_page; ++ if (!dimm->nr_pages) ++ continue; ++ ++ range = &pvt->page[row][chan]; ++ range->start = initial_page; ++ range->end = range->start + dimm->nr_pages - 1; ++ ++ /* ++ * Grain is one cache-line: ++ * On dual symetric mode, it is 128 Bytes; ++ * On single mode or asymetric, it is 64 bytes. ++ */ ++ if (pvt->is_symetric) { ++ dimm->grain = 1 << 7; ++ ++ /* ++ * In dual interleaved mode, the addresses ++ * need to be multiplied by 2, as both ++ * channels are interlaced, and the boundary ++ * limit there actually match each DIMM size ++ */ ++ range->start <<= 1; ++ range->end <<= 1; ++ } else { ++ dimm->grain = 1 << 6; ++ } ++ ++ snprintf(dimm->label, ++ EDAC_MC_LABEL_LEN, "DIMM %c%d", ++ (chan == 0) ? 'A' : 'B', row); + dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ + dimm->edac_mode = EDAC_SECDED; /* only supported */ +- } + +- csrow->first_page = last_cumul_size; +- csrow->last_page = cumul_size - 1; +- last_cumul_size = cumul_size; ++ /* ++ * This chipset supports both x8 and x16 memories, ++ * but datasheet doesn't describe how to distinguish ++ * between them. ++ * ++ * Also, the "Rank" comment at initial_page 17 says that ++ * ECC is only available with x8 memories. As this ++ * driver doesn't even initialize without ECC, better ++ * to assume that everything is x8. This is not ++ * actually true, on a mixed ECC/non-ECC scenario. ++ */ ++ dimm->dtype = DEV_X8; ++ ++ edac_dbg(1, ++ "%s: from page 0x%08x to 0x%08x (size: 0x%08x pages)\n", ++ dimm->label, ++ range->start, range->end, ++ dimm->nr_pages); ++ initial_page = last_page; ++ } + } + } + +-static void i82975x_print_dram_config(void __iomem *mch_window, u32 mchbar, u32 *drc) ++static void i82975x_print_dram_config(struct i82975x_pvt *pvt, ++ u32 mchbar, u32 *drc) + { + #ifdef CONFIG_EDAC_DEBUG + /* +@@ -444,63 +534,57 @@ static void i82975x_print_dram_config(void __iomem *mch_window, u32 mchbar, u32 + * Asus P5W Bios reports 15-5-4-4 + * What's your religion? + */ +- static const int caslats[4] = { 5, 4, 3, 6 }; +- u32 dtreg[2]; +- u8 c0drb[4]; +- u8 c1drb[4]; ++ static const int caslats[4] = { 5, 4, 3, 6 }; ++ u32 dtreg[2]; ++ int row; + ++ /* Show memory config if debug level is 1 or upper */ + if (!edac_debug_level) + return; + + i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n", +- mchbar, mch_window); +- +- c0drb[0] = readb(mch_window + I82975X_DRB_CH0R0); +- c0drb[1] = readb(mch_window + I82975X_DRB_CH0R1); +- c0drb[2] = readb(mch_window + I82975X_DRB_CH0R2); +- c0drb[3] = readb(mch_window + I82975X_DRB_CH0R3); +- c1drb[0] = readb(mch_window + I82975X_DRB_CH1R0); +- c1drb[1] = readb(mch_window + I82975X_DRB_CH1R1); +- c1drb[2] = readb(mch_window + I82975X_DRB_CH1R2); +- c1drb[3] = readb(mch_window + I82975X_DRB_CH1R3); +- i82975x_printk(KERN_INFO, "DRBCH0R0 = 0x%02x\n", c0drb[0]); +- i82975x_printk(KERN_INFO, "DRBCH0R1 = 0x%02x\n", c0drb[1]); +- i82975x_printk(KERN_INFO, "DRBCH0R2 = 0x%02x\n", c0drb[2]); +- i82975x_printk(KERN_INFO, "DRBCH0R3 = 0x%02x\n", c0drb[3]); +- i82975x_printk(KERN_INFO, "DRBCH1R0 = 0x%02x\n", c1drb[0]); +- i82975x_printk(KERN_INFO, "DRBCH1R1 = 0x%02x\n", c1drb[1]); +- i82975x_printk(KERN_INFO, "DRBCH1R2 = 0x%02x\n", c1drb[2]); +- i82975x_printk(KERN_INFO, "DRBCH1R3 = 0x%02x\n", c1drb[3]); +- +- i82975x_printk(KERN_INFO, "DRC_CH0 = %0x, %s\n", drc[0], ++ mchbar, pvt->mch_window); ++ ++ for (row = 0; row < DIMMS_PER_CHANNEL; row++) { ++ if (row) ++ /* Only show if at least one bank is filled */ ++ if ((pvt->drb[row][0] == pvt->drb[row-1][0]) && ++ (pvt->drb[row][1] == pvt->drb[row-1][1])) ++ continue; ++ ++ i82975x_printk(KERN_INFO, ++ "DRAM%i Rank Boundary Address: Channel A: 0x%08x; Channel B: 0x%08x\n", ++ row, ++ pvt->drb[row][0], ++ pvt->drb[row][1]); ++ } ++ ++ i82975x_printk(KERN_INFO, "DRAM Controller mode Channel A: = 0x%08x (%s); Channel B: 0x%08x (%s)\n", ++ drc[0], + ((drc[0] >> 21) & 3) == 1 ? +- "ECC enabled" : "ECC disabled"); +- i82975x_printk(KERN_INFO, "DRC_CH1 = %0x, %s\n", drc[1], ++ "ECC enabled" : "ECC disabled", ++ drc[1], + ((drc[1] >> 21) & 3) == 1 ? + "ECC enabled" : "ECC disabled"); + +- i82975x_printk(KERN_INFO, "C0 BNKARC = %0x\n", +- readw(mch_window + I82975X_C0BNKARC)); +- i82975x_printk(KERN_INFO, "C1 BNKARC = %0x\n", +- readw(mch_window + I82975X_C1BNKARC)); +- +- dtreg[0] = readl(mch_window + 0x114); +- dtreg[1] = readl(mch_window + 0x194); +- i82975x_printk(KERN_INFO, +- "DRAM Timings : Ch0 Ch1\n" +- " RAS Active Min = %d %d\n" +- " CAS latency = %d %d\n" +- " RAS to CAS = %d %d\n" +- " RAS precharge = %d %d\n", +- (dtreg[0] >> 19 ) & 0x0f, +- (dtreg[1] >> 19) & 0x0f, +- caslats[(dtreg[0] >> 8) & 0x03], +- caslats[(dtreg[1] >> 8) & 0x03], +- ((dtreg[0] >> 4) & 0x07) + 2, +- ((dtreg[1] >> 4) & 0x07) + 2, ++ i82975x_printk(KERN_INFO, "Bank Architecture Channel A: 0x%08x, Channel B: 0x%08x\n", ++ readw(pvt->mch_window + I82975X_C0BNKARC), ++ readw(pvt->mch_window + I82975X_C1BNKARC)); ++ ++ dtreg[0] = readl(pvt->mch_window + I82975X_C0DRT1); ++ dtreg[1] = readl(pvt->mch_window + I82975X_C1DRT1); ++ i82975x_printk(KERN_INFO, "DRAM Timings : ChA ChB\n"); ++ i82975x_printk(KERN_INFO, " RAS Active Min = %2d %2d\n", ++ (dtreg[0] >> 19 ) & 0x0f,(dtreg[1] >> 19) & 0x0f); ++ i82975x_printk(KERN_INFO, " CAS latency = %2d %2d\n", ++ caslats[(dtreg[0] >> 8) & 0x03], ++ caslats[(dtreg[1] >> 8) & 0x03]); ++ i82975x_printk(KERN_INFO, " RAS to CAS = %2d %2d\n", ++ ((dtreg[0] >> 4) & 0x07) + 2, ++ ((dtreg[1] >> 4) & 0x07) + 2); ++ i82975x_printk(KERN_INFO, " RAS precharge = %2d %2d\n", + (dtreg[0] & 0x07) + 2, +- (dtreg[1] & 0x07) + 2 +- ); ++ (dtreg[1] & 0x07) + 2); + #endif + } + +@@ -509,12 +593,10 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) + int rc = -ENODEV; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; +- struct i82975x_pvt *pvt; +- void __iomem *mch_window; ++ struct i82975x_pvt tmp_pvt, *pvt; + u32 mchbar; + u32 drc[2]; + struct i82975x_error_info discard; +- int chans; + + edac_dbg(0, "\n"); + +@@ -524,26 +606,35 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) + goto fail0; + } + mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ +- mch_window = ioremap_nocache(mchbar, 0x1000); ++ tmp_pvt.mch_window = ioremap_nocache(mchbar, 0x1000); ++ if (!tmp_pvt.mch_window) { ++ i82975x_printk(KERN_ERR, "Couldn't map MCHBAR registers.\n"); ++ rc = -ENOMEM; ++ goto fail0; ++ } + +- drc[0] = readl(mch_window + I82975X_DRC_CH0M0); +- drc[1] = readl(mch_window + I82975X_DRC_CH1M0); ++ drc[0] = readl(tmp_pvt.mch_window + I82975X_DRC_CH0M0); ++ drc[1] = readl(tmp_pvt.mch_window + I82975X_DRC_CH1M0); ++ ++ detect_memory_style(&tmp_pvt); ++ if (!tmp_pvt.num_channels) { ++ edac_dbg(3, "No memories installed? This shouldn't be running!\n"); ++ goto fail0; ++ } + +- i82975x_print_dram_config(mch_window, mchbar, drc); ++ i82975x_print_dram_config(&tmp_pvt, mchbar, drc); + + if (!(((drc[0] >> 21) & 3) == 1 || ((drc[1] >> 21) & 3) == 1)) { + i82975x_printk(KERN_INFO, "ECC disabled on both channels.\n"); + goto fail1; + } + +- chans = dual_channel_active(mch_window) + 1; +- + /* assuming only one controller, index thus is 0 */ + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; +- layers[0].size = I82975X_NR_DIMMS; ++ layers[0].size = DIMMS_PER_CHANNEL; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; +- layers[1].size = I82975X_NR_CSROWS(chans); ++ layers[1].size = tmp_pvt.num_channels; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (!mci) { +@@ -562,10 +653,12 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) + mci->dev_name = pci_name(pdev); + mci->edac_check = i82975x_check; + mci->ctl_page_to_phys = NULL; ++ + edac_dbg(3, "init pvt\n"); + pvt = (struct i82975x_pvt *) mci->pvt_info; +- pvt->mch_window = mch_window; +- i82975x_init_csrows(mci, pdev, mch_window); ++ *pvt = tmp_pvt; ++ ++ i82975x_init_csrows(mci, pvt, pdev); + mci->scrub_mode = SCRUB_HW_SRC; + i82975x_get_error_info(mci, &discard); /* clear counters */ + +@@ -583,7 +676,7 @@ fail2: + edac_mc_free(mci); + + fail1: +- iounmap(mch_window); ++ iounmap(tmp_pvt.mch_window); + fail0: + return rc; + } diff --git a/kernel.spec b/kernel.spec index 02506b39f..dd3aaeaf8 100644 --- a/kernel.spec +++ b/kernel.spec @@ -62,7 +62,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 1 +%global baserelease 2 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -725,6 +725,8 @@ Patch14000: hibernate-freeze-filesystems.patch Patch14010: lis3-improve-handling-of-null-rate.patch +Patch19001: i82975x-edac-fix.patch + # ARM # OMAP @@ -1438,6 +1440,8 @@ ApplyPatch efi-dont-map-boot-services-on-32bit.patch ApplyPatch lis3-improve-handling-of-null-rate.patch +ApplyPatch i82975x-edac-fix.patch + ApplyPatch power-x86-destdir.patch #rhbz 754518 @@ -2307,8 +2311,8 @@ fi # ||----w | # || || %changelog -* Mon Oct 15 2012 Justin M. Forbes -- re-enable modsign and secure boot +* Tue Oct 16 2012 Mauro Carvalho Chehab +- Fix i82975x_edac OOPS * Mon Oct 15 2012 Justin M. Forbes - 3.7.0-0.rc1.git0.1 - Linux 3.7-rc1