1 /* 2 * Defines, structures, APIs for edac_device 3 * 4 * (C) 2007 Linux Networx (http://lnxi.com) 5 * This file may be distributed under the terms of the 6 * GNU General Public License. 7 * 8 * Written by Thayne Harbaugh 9 * Based on work by Dan Hollis <goemon at anime dot net> and others. 10 * http://www.anime.net/~goemon/linux-ecc/ 11 * 12 * NMI handling support added by 13 * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> 14 * 15 * Refactored for multi-source files: 16 * Doug Thompson <norsk5@xmission.com> 17 * 18 * Please look at Documentation/driver-api/edac.rst for more info about 19 * EDAC core structs and functions. 20 */ 21 22 #ifndef _EDAC_DEVICE_H_ 23 #define _EDAC_DEVICE_H_ 24 25 #include <linux/device.h> 26 #include <linux/edac.h> 27 #include <linux/kobject.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/sysfs.h> 31 #include <linux/workqueue.h> 32 33 34 /* 35 * The following are the structures to provide for a generic 36 * or abstract 'edac_device'. This set of structures and the 37 * code that implements the APIs for the same, provide for 38 * registering EDAC type devices which are NOT standard memory. 39 * 40 * CPU caches (L1 and L2) 41 * DMA engines 42 * Core CPU switches 43 * Fabric switch units 44 * PCIe interface controllers 45 * other EDAC/ECC type devices that can be monitored for 46 * errors, etc. 47 * 48 * It allows for a 2 level set of hierarchy. For example: 49 * 50 * cache could be composed of L1, L2 and L3 levels of cache. 51 * Each CPU core would have its own L1 cache, while sharing 52 * L2 and maybe L3 caches. 53 * 54 * View them arranged, via the sysfs presentation: 55 * /sys/devices/system/edac/.. 56 * 57 * mc/ <existing memory device directory> 58 * cpu/cpu0/.. <L1 and L2 block directory> 59 * /L1-cache/ce_count 60 * /ue_count 61 * /L2-cache/ce_count 62 * /ue_count 63 * cpu/cpu1/.. <L1 and L2 block directory> 64 * /L1-cache/ce_count 65 * /ue_count 66 * /L2-cache/ce_count 67 * /ue_count 68 * ... 69 * 70 * the L1 and L2 directories would be "edac_device_block's" 71 */ 72 73 struct edac_device_counter { 74 u32 ue_count; 75 u32 ce_count; 76 }; 77 78 /* forward reference */ 79 struct edac_device_ctl_info; 80 struct edac_device_block; 81 82 /* edac_dev_sysfs_attribute structure 83 * used for driver sysfs attributes in mem_ctl_info 84 * for extra controls and attributes: 85 * like high level error Injection controls 86 */ 87 struct edac_dev_sysfs_attribute { 88 struct attribute attr; 89 ssize_t (*show)(struct edac_device_ctl_info *, char *); 90 ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t); 91 }; 92 93 /* edac_dev_sysfs_block_attribute structure 94 * 95 * used in leaf 'block' nodes for adding controls/attributes 96 * 97 * each block in each instance of the containing control structure can 98 * have an array of the following. The show function will be filled in 99 * with the show function in the low level driver. 100 */ 101 struct edac_dev_sysfs_block_attribute { 102 struct attribute attr; 103 ssize_t (*show)(struct kobject *, struct attribute *, char *); 104 }; 105 106 /* device block control structure */ 107 struct edac_device_block { 108 struct edac_device_instance *instance; /* Up Pointer */ 109 char name[EDAC_DEVICE_NAME_LEN + 1]; 110 111 struct edac_device_counter counters; /* basic UE and CE counters */ 112 113 int nr_attribs; /* how many attributes */ 114 115 /* this block's attributes, could be NULL */ 116 struct edac_dev_sysfs_block_attribute *block_attributes; 117 118 /* edac sysfs device control */ 119 struct kobject kobj; 120 }; 121 122 /* device instance control structure */ 123 struct edac_device_instance { 124 struct edac_device_ctl_info *ctl; /* Up pointer */ 125 char name[EDAC_DEVICE_NAME_LEN + 4]; 126 127 struct edac_device_counter counters; /* instance counters */ 128 129 u32 nr_blocks; /* how many blocks */ 130 struct edac_device_block *blocks; /* block array */ 131 132 /* edac sysfs device control */ 133 struct kobject kobj; 134 }; 135 136 137 /* 138 * Abstract edac_device control info structure 139 * 140 */ 141 struct edac_device_ctl_info { 142 /* for global list of edac_device_ctl_info structs */ 143 struct list_head link; 144 145 struct module *owner; /* Module owner of this control struct */ 146 147 int dev_idx; 148 149 /* Per instance controls for this edac_device */ 150 int log_ue; /* boolean for logging UEs */ 151 int log_ce; /* boolean for logging CEs */ 152 int panic_on_ue; /* boolean for panic'ing on an UE */ 153 unsigned poll_msec; /* number of milliseconds to poll interval */ 154 unsigned long delay; /* number of jiffies for poll_msec */ 155 156 /* Additional top controller level attributes, but specified 157 * by the low level driver. 158 * 159 * Set by the low level driver to provide attributes at the 160 * controller level, same level as 'ue_count' and 'ce_count' above. 161 * An array of structures, NULL terminated 162 * 163 * If attributes are desired, then set to array of attributes 164 * If no attributes are desired, leave NULL 165 */ 166 struct edac_dev_sysfs_attribute *sysfs_attributes; 167 168 /* pointer to main 'edac' subsys in sysfs */ 169 const struct bus_type *edac_subsys; 170 171 /* the internal state of this controller instance */ 172 int op_state; 173 /* work struct for this instance */ 174 struct delayed_work work; 175 176 /* pointer to edac polling checking routine: 177 * If NOT NULL: points to polling check routine 178 * If NULL: Then assumes INTERRUPT operation, where 179 * MC driver will receive events 180 */ 181 void (*edac_check) (struct edac_device_ctl_info * edac_dev); 182 183 struct device *dev; /* pointer to device structure */ 184 185 const char *mod_name; /* module name */ 186 const char *ctl_name; /* edac controller name */ 187 const char *dev_name; /* pci/platform/etc... name */ 188 189 void *pvt_info; /* pointer to 'private driver' info */ 190 191 unsigned long start_time; /* edac_device load start time (jiffies) */ 192 193 /* sysfs top name under 'edac' directory 194 * and instance name: 195 * cpu/cpu0/... 196 * cpu/cpu1/... 197 * cpu/cpu2/... 198 * ... 199 */ 200 char name[EDAC_DEVICE_NAME_LEN + 1]; 201 202 /* Number of instances supported on this control structure 203 * and the array of those instances 204 */ 205 u32 nr_instances; 206 struct edac_device_instance *instances; 207 struct edac_device_block *blocks; 208 209 /* Event counters for the this whole EDAC Device */ 210 struct edac_device_counter counters; 211 212 /* edac sysfs device control for the 'name' 213 * device this structure controls 214 */ 215 struct kobject kobj; 216 }; 217 218 /* To get from the instance's wq to the beginning of the ctl structure */ 219 #define to_edac_mem_ctl_work(w) \ 220 container_of(w, struct mem_ctl_info, work) 221 222 #define to_edac_device_ctl_work(w) \ 223 container_of(w,struct edac_device_ctl_info,work) 224 225 /* 226 * The alloc() and free() functions for the 'edac_device' control info 227 * structure. A MC driver will allocate one of these for each edac_device 228 * it is going to control/register with the EDAC CORE. 229 */ 230 extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( 231 unsigned sizeof_private, 232 char *edac_device_name, unsigned nr_instances, 233 char *edac_block_name, unsigned nr_blocks, 234 unsigned offset_value, 235 int device_index); 236 237 /* The offset value can be: 238 * -1 indicating no offset value 239 * 0 for zero-based block numbers 240 * 1 for 1-based block number 241 * other for other-based block number 242 */ 243 #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1) 244 245 extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info); 246 247 /** 248 * edac_device_add_device - Insert the 'edac_dev' structure into the 249 * edac_device global list and create sysfs entries associated with 250 * edac_device structure. 251 * 252 * @edac_dev: pointer to edac_device structure to be added to the list 253 * 'edac_device' structure. 254 * 255 * Returns: 256 * 0 on Success, or an error code on failure 257 */ 258 extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); 259 260 /** 261 * edac_device_del_device - Remove sysfs entries for specified edac_device 262 * structure and then remove edac_device structure from global list 263 * 264 * @dev: 265 * Pointer to struct &device representing the edac device 266 * structure to remove. 267 * 268 * Returns: 269 * Pointer to removed edac_device structure, 270 * or %NULL if device not found. 271 */ 272 extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); 273 274 /** 275 * edac_device_handle_ce_count - Log correctable errors. 276 * 277 * @edac_dev: pointer to struct &edac_device_ctl_info 278 * @inst_nr: number of the instance where the CE error happened 279 * @count: Number of errors to log. 280 * @block_nr: number of the block where the CE error happened 281 * @msg: message to be printed 282 */ 283 void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, 284 unsigned int count, int inst_nr, int block_nr, 285 const char *msg); 286 287 /** 288 * edac_device_handle_ue_count - Log uncorrectable errors. 289 * 290 * @edac_dev: pointer to struct &edac_device_ctl_info 291 * @inst_nr: number of the instance where the CE error happened 292 * @count: Number of errors to log. 293 * @block_nr: number of the block where the CE error happened 294 * @msg: message to be printed 295 */ 296 void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, 297 unsigned int count, int inst_nr, int block_nr, 298 const char *msg); 299 300 /** 301 * edac_device_handle_ce(): Log a single correctable error 302 * 303 * @edac_dev: pointer to struct &edac_device_ctl_info 304 * @inst_nr: number of the instance where the CE error happened 305 * @block_nr: number of the block where the CE error happened 306 * @msg: message to be printed 307 */ 308 static inline void 309 edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr, 310 int block_nr, const char *msg) 311 { 312 edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg); 313 } 314 315 /** 316 * edac_device_handle_ue(): Log a single uncorrectable error 317 * 318 * @edac_dev: pointer to struct &edac_device_ctl_info 319 * @inst_nr: number of the instance where the UE error happened 320 * @block_nr: number of the block where the UE error happened 321 * @msg: message to be printed 322 */ 323 static inline void 324 edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr, 325 int block_nr, const char *msg) 326 { 327 edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg); 328 } 329 330 /** 331 * edac_device_alloc_index: Allocate a unique device index number 332 * 333 * Returns: 334 * allocated index number 335 */ 336 extern int edac_device_alloc_index(void); 337 extern const char *edac_layer_name[]; 338 339 /* Free the actual struct */ 340 static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci) 341 { 342 if (ci) { 343 kfree(ci->pvt_info); 344 kfree(ci->blocks); 345 kfree(ci->instances); 346 kfree(ci); 347 } 348 } 349 #endif 350