xref: /linux/drivers/hwmon/coretemp.c (revision 621cde16e49b3ecf7d59a8106a20aaebfb4a59a9)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * coretemp.c - Linux kernel module for hardware monitoring
4   *
5   * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
6   *
7   * Inspired from many hwmon drivers
8   */
9  
10  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11  
12  #include <linux/module.h>
13  #include <linux/init.h>
14  #include <linux/slab.h>
15  #include <linux/jiffies.h>
16  #include <linux/hwmon.h>
17  #include <linux/sysfs.h>
18  #include <linux/hwmon-sysfs.h>
19  #include <linux/err.h>
20  #include <linux/mutex.h>
21  #include <linux/list.h>
22  #include <linux/platform_device.h>
23  #include <linux/cpu.h>
24  #include <linux/smp.h>
25  #include <linux/moduleparam.h>
26  #include <linux/pci.h>
27  #include <asm/msr.h>
28  #include <asm/processor.h>
29  #include <asm/cpu_device_id.h>
30  #include <linux/sched/isolation.h>
31  
32  #define DRVNAME	"coretemp"
33  
34  /*
35   * force_tjmax only matters when TjMax can't be read from the CPU itself.
36   * When set, it replaces the driver's suboptimal heuristic.
37   */
38  static int force_tjmax;
39  module_param_named(tjmax, force_tjmax, int, 0444);
40  MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
41  
42  #define NUM_REAL_CORES		512	/* Number of Real cores per cpu */
43  #define CORETEMP_NAME_LENGTH	28	/* String Length of attrs */
44  
45  enum coretemp_attr_index {
46  	ATTR_LABEL,
47  	ATTR_CRIT_ALARM,
48  	ATTR_TEMP,
49  	ATTR_TJMAX,
50  	ATTR_TTARGET,
51  	MAX_CORE_ATTRS = ATTR_TJMAX + 1,	/* Maximum no of basic attrs */
52  	TOTAL_ATTRS = ATTR_TTARGET + 1		/* Maximum no of possible attrs */
53  };
54  
55  #ifdef CONFIG_SMP
56  #define for_each_sibling(i, cpu) \
57  	for_each_cpu(i, topology_sibling_cpumask(cpu))
58  #else
59  #define for_each_sibling(i, cpu)	for (i = 0; false; )
60  #endif
61  
62  /*
63   * Per-Core Temperature Data
64   * @tjmax: The static tjmax value when tjmax cannot be retrieved from
65   *		IA32_TEMPERATURE_TARGET MSR.
66   * @last_updated: The time when the current temperature value was updated
67   *		earlier (in jiffies).
68   * @cpu_core_id: The CPU Core from which temperature values should be read
69   *		This value is passed as "id" field to rdmsr/wrmsr functions.
70   * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
71   *		from where the temperature values should be read.
72   * @attr_size:  Total number of pre-core attrs displayed in the sysfs.
73   */
74  struct temp_data {
75  	int temp;
76  	int tjmax;
77  	unsigned long last_updated;
78  	unsigned int cpu;
79  	int index;
80  	u32 cpu_core_id;
81  	u32 status_reg;
82  	int attr_size;
83  	struct device_attribute sd_attrs[TOTAL_ATTRS];
84  	char attr_name[TOTAL_ATTRS][CORETEMP_NAME_LENGTH];
85  	struct attribute *attrs[TOTAL_ATTRS + 1];
86  	struct attribute_group attr_group;
87  	struct mutex update_lock;
88  };
89  
90  /* Platform Data per Physical CPU */
91  struct platform_data {
92  	struct device		*hwmon_dev;
93  	u16			pkg_id;
94  	int			nr_cores;
95  	struct ida		ida;
96  	struct cpumask		cpumask;
97  	struct temp_data	*pkg_data;
98  	struct temp_data	**core_data;
99  	struct device_attribute name_attr;
100  };
101  
102  struct tjmax_pci {
103  	unsigned int device;
104  	int tjmax;
105  };
106  
107  static const struct tjmax_pci tjmax_pci_table[] = {
108  	{ 0x0708, 110000 },	/* CE41x0 (Sodaville ) */
109  	{ 0x0c72, 102000 },	/* Atom S1240 (Centerton) */
110  	{ 0x0c73, 95000 },	/* Atom S1220 (Centerton) */
111  	{ 0x0c75, 95000 },	/* Atom S1260 (Centerton) */
112  };
113  
114  struct tjmax {
115  	char const *id;
116  	int tjmax;
117  };
118  
119  static const struct tjmax tjmax_table[] = {
120  	{ "CPU  230", 100000 },		/* Model 0x1c, stepping 2	*/
121  	{ "CPU  330", 125000 },		/* Model 0x1c, stepping 2	*/
122  };
123  
124  struct tjmax_model {
125  	u8 model;
126  	u8 mask;
127  	int tjmax;
128  };
129  
130  #define ANY 0xff
131  
132  static const struct tjmax_model tjmax_model_table[] = {
133  	{ 0x1c, 10, 100000 },	/* D4xx, K4xx, N4xx, D5xx, K5xx, N5xx */
134  	{ 0x1c, ANY, 90000 },	/* Z5xx, N2xx, possibly others
135  				 * Note: Also matches 230 and 330,
136  				 * which are covered by tjmax_table
137  				 */
138  	{ 0x26, ANY, 90000 },	/* Atom Tunnel Creek (Exx), Lincroft (Z6xx)
139  				 * Note: TjMax for E6xxT is 110C, but CPU type
140  				 * is undetectable by software
141  				 */
142  	{ 0x27, ANY, 90000 },	/* Atom Medfield (Z2460) */
143  	{ 0x35, ANY, 90000 },	/* Atom Clover Trail/Cloverview (Z27x0) */
144  	{ 0x36, ANY, 100000 },	/* Atom Cedar Trail/Cedarview (N2xxx, D2xxx)
145  				 * Also matches S12x0 (stepping 9), covered by
146  				 * PCI table
147  				 */
148  };
149  
is_pkg_temp_data(struct temp_data * tdata)150  static bool is_pkg_temp_data(struct temp_data *tdata)
151  {
152  	return tdata->index < 0;
153  }
154  
adjust_tjmax(struct cpuinfo_x86 * c,u32 id,struct device * dev)155  static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
156  {
157  	/* The 100C is default for both mobile and non mobile CPUs */
158  
159  	int tjmax = 100000;
160  	int tjmax_ee = 85000;
161  	int usemsr_ee = 1;
162  	int err;
163  	u32 eax, edx;
164  	int i;
165  	u16 devfn = PCI_DEVFN(0, 0);
166  	struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn);
167  
168  	/*
169  	 * Explicit tjmax table entries override heuristics.
170  	 * First try PCI host bridge IDs, followed by model ID strings
171  	 * and model/stepping information.
172  	 */
173  	if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) {
174  		for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) {
175  			if (host_bridge->device == tjmax_pci_table[i].device) {
176  				pci_dev_put(host_bridge);
177  				return tjmax_pci_table[i].tjmax;
178  			}
179  		}
180  	}
181  	pci_dev_put(host_bridge);
182  
183  	for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) {
184  		if (strstr(c->x86_model_id, tjmax_table[i].id))
185  			return tjmax_table[i].tjmax;
186  	}
187  
188  	for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) {
189  		const struct tjmax_model *tm = &tjmax_model_table[i];
190  		if (c->x86_model == tm->model &&
191  		    (tm->mask == ANY || c->x86_stepping == tm->mask))
192  			return tm->tjmax;
193  	}
194  
195  	/* Early chips have no MSR for TjMax */
196  
197  	if (c->x86_model == 0xf && c->x86_stepping < 4)
198  		usemsr_ee = 0;
199  
200  	if (c->x86_model > 0xe && usemsr_ee) {
201  		u8 platform_id;
202  
203  		/*
204  		 * Now we can detect the mobile CPU using Intel provided table
205  		 * http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
206  		 * For Core2 cores, check MSR 0x17, bit 28 1 = Mobile CPU
207  		 */
208  		err = rdmsr_safe_on_cpu(id, 0x17, &eax, &edx);
209  		if (err) {
210  			dev_warn(dev,
211  				 "Unable to access MSR 0x17, assuming desktop"
212  				 " CPU\n");
213  			usemsr_ee = 0;
214  		} else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
215  			/*
216  			 * Trust bit 28 up to Penryn, I could not find any
217  			 * documentation on that; if you happen to know
218  			 * someone at Intel please ask
219  			 */
220  			usemsr_ee = 0;
221  		} else {
222  			/* Platform ID bits 52:50 (EDX starts at bit 32) */
223  			platform_id = (edx >> 18) & 0x7;
224  
225  			/*
226  			 * Mobile Penryn CPU seems to be platform ID 7 or 5
227  			 * (guesswork)
228  			 */
229  			if (c->x86_model == 0x17 &&
230  			    (platform_id == 5 || platform_id == 7)) {
231  				/*
232  				 * If MSR EE bit is set, set it to 90 degrees C,
233  				 * otherwise 105 degrees C
234  				 */
235  				tjmax_ee = 90000;
236  				tjmax = 105000;
237  			}
238  		}
239  	}
240  
241  	if (usemsr_ee) {
242  		err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
243  		if (err) {
244  			dev_warn(dev,
245  				 "Unable to access MSR 0xEE, for Tjmax, left"
246  				 " at default\n");
247  		} else if (eax & 0x40000000) {
248  			tjmax = tjmax_ee;
249  		}
250  	} else if (tjmax == 100000) {
251  		/*
252  		 * If we don't use msr EE it means we are desktop CPU
253  		 * (with exeception of Atom)
254  		 */
255  		dev_warn(dev, "Using relative temperature scale!\n");
256  	}
257  
258  	return tjmax;
259  }
260  
cpu_has_tjmax(struct cpuinfo_x86 * c)261  static bool cpu_has_tjmax(struct cpuinfo_x86 *c)
262  {
263  	u8 model = c->x86_model;
264  
265  	return model > 0xe &&
266  	       model != 0x1c &&
267  	       model != 0x26 &&
268  	       model != 0x27 &&
269  	       model != 0x35 &&
270  	       model != 0x36;
271  }
272  
get_tjmax(struct temp_data * tdata,struct device * dev)273  static int get_tjmax(struct temp_data *tdata, struct device *dev)
274  {
275  	struct cpuinfo_x86 *c = &cpu_data(tdata->cpu);
276  	int err;
277  	u32 eax, edx;
278  	u32 val;
279  
280  	/* use static tjmax once it is set */
281  	if (tdata->tjmax)
282  		return tdata->tjmax;
283  
284  	/*
285  	 * A new feature of current Intel(R) processors, the
286  	 * IA32_TEMPERATURE_TARGET contains the TjMax value
287  	 */
288  	err = rdmsr_safe_on_cpu(tdata->cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
289  	if (err) {
290  		if (cpu_has_tjmax(c))
291  			dev_warn(dev, "Unable to read TjMax from CPU %u\n", tdata->cpu);
292  	} else {
293  		val = (eax >> 16) & 0xff;
294  		if (val)
295  			return val * 1000;
296  	}
297  
298  	if (force_tjmax) {
299  		dev_notice(dev, "TjMax forced to %d degrees C by user\n",
300  			   force_tjmax);
301  		tdata->tjmax = force_tjmax * 1000;
302  	} else {
303  		/*
304  		 * An assumption is made for early CPUs and unreadable MSR.
305  		 * NOTE: the calculated value may not be correct.
306  		 */
307  		tdata->tjmax = adjust_tjmax(c, tdata->cpu, dev);
308  	}
309  	return tdata->tjmax;
310  }
311  
get_ttarget(struct temp_data * tdata,struct device * dev)312  static int get_ttarget(struct temp_data *tdata, struct device *dev)
313  {
314  	u32 eax, edx;
315  	int tjmax, ttarget_offset, ret;
316  
317  	/*
318  	 * ttarget is valid only if tjmax can be retrieved from
319  	 * MSR_IA32_TEMPERATURE_TARGET
320  	 */
321  	if (tdata->tjmax)
322  		return -ENODEV;
323  
324  	ret = rdmsr_safe_on_cpu(tdata->cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
325  	if (ret)
326  		return ret;
327  
328  	tjmax = (eax >> 16) & 0xff;
329  
330  	/* Read the still undocumented bits 8:15 of IA32_TEMPERATURE_TARGET. */
331  	ttarget_offset = (eax >> 8) & 0xff;
332  
333  	return (tjmax - ttarget_offset) * 1000;
334  }
335  
336  /* Keep track of how many zone pointers we allocated in init() */
337  static int max_zones __read_mostly;
338  /* Array of zone pointers. Serialized by cpu hotplug lock */
339  static struct platform_device **zone_devices;
340  
show_label(struct device * dev,struct device_attribute * devattr,char * buf)341  static ssize_t show_label(struct device *dev,
342  				struct device_attribute *devattr, char *buf)
343  {
344  	struct platform_data *pdata = dev_get_drvdata(dev);
345  	struct temp_data *tdata = container_of(devattr, struct temp_data, sd_attrs[ATTR_LABEL]);
346  
347  	if (is_pkg_temp_data(tdata))
348  		return sprintf(buf, "Package id %u\n", pdata->pkg_id);
349  
350  	return sprintf(buf, "Core %u\n", tdata->cpu_core_id);
351  }
352  
show_crit_alarm(struct device * dev,struct device_attribute * devattr,char * buf)353  static ssize_t show_crit_alarm(struct device *dev,
354  				struct device_attribute *devattr, char *buf)
355  {
356  	u32 eax, edx;
357  	struct temp_data *tdata = container_of(devattr, struct temp_data,
358  						sd_attrs[ATTR_CRIT_ALARM]);
359  
360  	mutex_lock(&tdata->update_lock);
361  	rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
362  	mutex_unlock(&tdata->update_lock);
363  
364  	return sprintf(buf, "%d\n", (eax >> 5) & 1);
365  }
366  
show_tjmax(struct device * dev,struct device_attribute * devattr,char * buf)367  static ssize_t show_tjmax(struct device *dev,
368  			struct device_attribute *devattr, char *buf)
369  {
370  	struct temp_data *tdata = container_of(devattr, struct temp_data, sd_attrs[ATTR_TJMAX]);
371  	int tjmax;
372  
373  	mutex_lock(&tdata->update_lock);
374  	tjmax = get_tjmax(tdata, dev);
375  	mutex_unlock(&tdata->update_lock);
376  
377  	return sprintf(buf, "%d\n", tjmax);
378  }
379  
show_ttarget(struct device * dev,struct device_attribute * devattr,char * buf)380  static ssize_t show_ttarget(struct device *dev,
381  				struct device_attribute *devattr, char *buf)
382  {
383  	struct temp_data *tdata = container_of(devattr, struct temp_data, sd_attrs[ATTR_TTARGET]);
384  	int ttarget;
385  
386  	mutex_lock(&tdata->update_lock);
387  	ttarget = get_ttarget(tdata, dev);
388  	mutex_unlock(&tdata->update_lock);
389  
390  	if (ttarget < 0)
391  		return ttarget;
392  	return sprintf(buf, "%d\n", ttarget);
393  }
394  
show_temp(struct device * dev,struct device_attribute * devattr,char * buf)395  static ssize_t show_temp(struct device *dev,
396  			struct device_attribute *devattr, char *buf)
397  {
398  	u32 eax, edx;
399  	struct temp_data *tdata = container_of(devattr, struct temp_data, sd_attrs[ATTR_TEMP]);
400  	int tjmax;
401  
402  	mutex_lock(&tdata->update_lock);
403  
404  	tjmax = get_tjmax(tdata, dev);
405  	/* Check whether the time interval has elapsed */
406  	if (time_after(jiffies, tdata->last_updated + HZ)) {
407  		rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
408  		/*
409  		 * Ignore the valid bit. In all observed cases the register
410  		 * value is either low or zero if the valid bit is 0.
411  		 * Return it instead of reporting an error which doesn't
412  		 * really help at all.
413  		 */
414  		tdata->temp = tjmax - ((eax >> 16) & 0xff) * 1000;
415  		tdata->last_updated = jiffies;
416  	}
417  
418  	mutex_unlock(&tdata->update_lock);
419  	return sprintf(buf, "%d\n", tdata->temp);
420  }
421  
create_core_attrs(struct temp_data * tdata,struct device * dev)422  static int create_core_attrs(struct temp_data *tdata, struct device *dev)
423  {
424  	int i;
425  	static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
426  			struct device_attribute *devattr, char *buf) = {
427  			show_label, show_crit_alarm, show_temp, show_tjmax,
428  			show_ttarget };
429  	static const char *const suffixes[TOTAL_ATTRS] = {
430  		"label", "crit_alarm", "input", "crit", "max"
431  	};
432  
433  	for (i = 0; i < tdata->attr_size; i++) {
434  		/*
435  		 * We map the attr number to core id of the CPU
436  		 * The attr number is always core id + 2
437  		 * The Pkgtemp will always show up as temp1_*, if available
438  		 */
439  		int attr_no = is_pkg_temp_data(tdata) ? 1 : tdata->cpu_core_id + 2;
440  
441  		snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH,
442  			 "temp%d_%s", attr_no, suffixes[i]);
443  		sysfs_attr_init(&tdata->sd_attrs[i].attr);
444  		tdata->sd_attrs[i].attr.name = tdata->attr_name[i];
445  		tdata->sd_attrs[i].attr.mode = 0444;
446  		tdata->sd_attrs[i].show = rd_ptr[i];
447  		tdata->attrs[i] = &tdata->sd_attrs[i].attr;
448  	}
449  	tdata->attr_group.attrs = tdata->attrs;
450  	return sysfs_create_group(&dev->kobj, &tdata->attr_group);
451  }
452  
453  
chk_ucode_version(unsigned int cpu)454  static int chk_ucode_version(unsigned int cpu)
455  {
456  	struct cpuinfo_x86 *c = &cpu_data(cpu);
457  
458  	/*
459  	 * Check if we have problem with errata AE18 of Core processors:
460  	 * Readings might stop update when processor visited too deep sleep,
461  	 * fixed for stepping D0 (6EC).
462  	 */
463  	if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) {
464  		pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
465  		return -ENODEV;
466  	}
467  	return 0;
468  }
469  
coretemp_get_pdev(unsigned int cpu)470  static struct platform_device *coretemp_get_pdev(unsigned int cpu)
471  {
472  	int id = topology_logical_die_id(cpu);
473  
474  	if (id >= 0 && id < max_zones)
475  		return zone_devices[id];
476  	return NULL;
477  }
478  
479  static struct temp_data *
init_temp_data(struct platform_data * pdata,unsigned int cpu,int pkg_flag)480  init_temp_data(struct platform_data *pdata, unsigned int cpu, int pkg_flag)
481  {
482  	struct temp_data *tdata;
483  
484  	if (!pdata->core_data) {
485  		/*
486  		 * TODO:
487  		 * The information of actual possible cores in a package is broken for now.
488  		 * Will replace hardcoded NUM_REAL_CORES with actual per package core count
489  		 * when this information becomes available.
490  		 */
491  		pdata->nr_cores = NUM_REAL_CORES;
492  		pdata->core_data = kcalloc(pdata->nr_cores, sizeof(struct temp_data *),
493  					   GFP_KERNEL);
494  		if (!pdata->core_data)
495  			return NULL;
496  	}
497  
498  	tdata = kzalloc(sizeof(struct temp_data), GFP_KERNEL);
499  	if (!tdata)
500  		return NULL;
501  
502  	if (pkg_flag) {
503  		pdata->pkg_data = tdata;
504  		/* Use tdata->index as indicator of package temp data */
505  		tdata->index = -1;
506  	} else {
507  		tdata->index = ida_alloc_max(&pdata->ida, pdata->nr_cores - 1, GFP_KERNEL);
508  		if (tdata->index < 0) {
509  			kfree(tdata);
510  			return NULL;
511  		}
512  		pdata->core_data[tdata->index] = tdata;
513  	}
514  
515  	tdata->status_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_STATUS :
516  							MSR_IA32_THERM_STATUS;
517  	tdata->cpu = cpu;
518  	tdata->cpu_core_id = topology_core_id(cpu);
519  	tdata->attr_size = MAX_CORE_ATTRS;
520  	mutex_init(&tdata->update_lock);
521  	return tdata;
522  }
523  
destroy_temp_data(struct platform_data * pdata,struct temp_data * tdata)524  static void destroy_temp_data(struct platform_data *pdata, struct temp_data *tdata)
525  {
526  	if (is_pkg_temp_data(tdata)) {
527  		pdata->pkg_data = NULL;
528  		kfree(pdata->core_data);
529  		pdata->core_data = NULL;
530  		pdata->nr_cores = 0;
531  	} else {
532  		pdata->core_data[tdata->index] = NULL;
533  		ida_free(&pdata->ida, tdata->index);
534  	}
535  	kfree(tdata);
536  }
537  
get_temp_data(struct platform_data * pdata,int cpu)538  static struct temp_data *get_temp_data(struct platform_data *pdata, int cpu)
539  {
540  	int i;
541  
542  	/* cpu < 0 means get pkg temp_data */
543  	if (cpu < 0)
544  		return pdata->pkg_data;
545  
546  	for (i = 0; i < pdata->nr_cores; i++) {
547  		if (pdata->core_data[i] &&
548  		    pdata->core_data[i]->cpu_core_id == topology_core_id(cpu))
549  			return pdata->core_data[i];
550  	}
551  	return NULL;
552  }
553  
create_core_data(struct platform_device * pdev,unsigned int cpu,int pkg_flag)554  static int create_core_data(struct platform_device *pdev, unsigned int cpu,
555  			    int pkg_flag)
556  {
557  	struct temp_data *tdata;
558  	struct platform_data *pdata = platform_get_drvdata(pdev);
559  	struct cpuinfo_x86 *c = &cpu_data(cpu);
560  	u32 eax, edx;
561  	int err;
562  
563  	if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
564  		return 0;
565  
566  	tdata = init_temp_data(pdata, cpu, pkg_flag);
567  	if (!tdata)
568  		return -ENOMEM;
569  
570  	/* Test if we can access the status register */
571  	err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
572  	if (err)
573  		goto err;
574  
575  	/* Make sure tdata->tjmax is a valid indicator for dynamic/static tjmax */
576  	get_tjmax(tdata, &pdev->dev);
577  
578  	/*
579  	 * The target temperature is available on older CPUs but not in the
580  	 * MSR_IA32_TEMPERATURE_TARGET register. Atoms don't have the register
581  	 * at all.
582  	 */
583  	if (c->x86_model > 0xe && c->x86_model != 0x1c)
584  		if (get_ttarget(tdata, &pdev->dev) >= 0)
585  			tdata->attr_size++;
586  
587  	/* Create sysfs interfaces */
588  	err = create_core_attrs(tdata, pdata->hwmon_dev);
589  	if (err)
590  		goto err;
591  
592  	return 0;
593  
594  err:
595  	destroy_temp_data(pdata, tdata);
596  	return err;
597  }
598  
599  static void
coretemp_add_core(struct platform_device * pdev,unsigned int cpu,int pkg_flag)600  coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag)
601  {
602  	if (create_core_data(pdev, cpu, pkg_flag))
603  		dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
604  }
605  
coretemp_remove_core(struct platform_data * pdata,struct temp_data * tdata)606  static void coretemp_remove_core(struct platform_data *pdata, struct temp_data *tdata)
607  {
608  	/* if we errored on add then this is already gone */
609  	if (!tdata)
610  		return;
611  
612  	/* Remove the sysfs attributes */
613  	sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
614  
615  	destroy_temp_data(pdata, tdata);
616  }
617  
coretemp_device_add(int zoneid)618  static int coretemp_device_add(int zoneid)
619  {
620  	struct platform_device *pdev;
621  	struct platform_data *pdata;
622  	int err;
623  
624  	/* Initialize the per-zone data structures */
625  	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
626  	if (!pdata)
627  		return -ENOMEM;
628  
629  	pdata->pkg_id = zoneid;
630  	ida_init(&pdata->ida);
631  
632  	pdev = platform_device_alloc(DRVNAME, zoneid);
633  	if (!pdev) {
634  		err = -ENOMEM;
635  		goto err_free_pdata;
636  	}
637  
638  	err = platform_device_add(pdev);
639  	if (err)
640  		goto err_put_dev;
641  
642  	platform_set_drvdata(pdev, pdata);
643  	zone_devices[zoneid] = pdev;
644  	return 0;
645  
646  err_put_dev:
647  	platform_device_put(pdev);
648  err_free_pdata:
649  	kfree(pdata);
650  	return err;
651  }
652  
coretemp_device_remove(int zoneid)653  static void coretemp_device_remove(int zoneid)
654  {
655  	struct platform_device *pdev = zone_devices[zoneid];
656  	struct platform_data *pdata = platform_get_drvdata(pdev);
657  
658  	ida_destroy(&pdata->ida);
659  	kfree(pdata);
660  	platform_device_unregister(pdev);
661  }
662  
coretemp_cpu_online(unsigned int cpu)663  static int coretemp_cpu_online(unsigned int cpu)
664  {
665  	struct platform_device *pdev = coretemp_get_pdev(cpu);
666  	struct cpuinfo_x86 *c = &cpu_data(cpu);
667  	struct platform_data *pdata;
668  
669  	/*
670  	 * Don't execute this on resume as the offline callback did
671  	 * not get executed on suspend.
672  	 */
673  	if (cpuhp_tasks_frozen)
674  		return 0;
675  
676  	/*
677  	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
678  	 * sensors. We check this bit only, all the early CPUs
679  	 * without thermal sensors will be filtered out.
680  	 */
681  	if (!cpu_has(c, X86_FEATURE_DTHERM))
682  		return -ENODEV;
683  
684  	pdata = platform_get_drvdata(pdev);
685  	if (!pdata->hwmon_dev) {
686  		struct device *hwmon;
687  
688  		/* Check the microcode version of the CPU */
689  		if (chk_ucode_version(cpu))
690  			return -EINVAL;
691  
692  		/*
693  		 * Alright, we have DTS support.
694  		 * We are bringing the _first_ core in this pkg
695  		 * online. So, initialize per-pkg data structures and
696  		 * then bring this core online.
697  		 */
698  		hwmon = hwmon_device_register_with_groups(&pdev->dev, DRVNAME,
699  							  pdata, NULL);
700  		if (IS_ERR(hwmon))
701  			return PTR_ERR(hwmon);
702  		pdata->hwmon_dev = hwmon;
703  
704  		/*
705  		 * Check whether pkgtemp support is available.
706  		 * If so, add interfaces for pkgtemp.
707  		 */
708  		if (cpu_has(c, X86_FEATURE_PTS))
709  			coretemp_add_core(pdev, cpu, 1);
710  	}
711  
712  	/*
713  	 * Check whether a thread sibling is already online. If not add the
714  	 * interface for this CPU core.
715  	 */
716  	if (!cpumask_intersects(&pdata->cpumask, topology_sibling_cpumask(cpu)))
717  		coretemp_add_core(pdev, cpu, 0);
718  
719  	cpumask_set_cpu(cpu, &pdata->cpumask);
720  	return 0;
721  }
722  
coretemp_cpu_offline(unsigned int cpu)723  static int coretemp_cpu_offline(unsigned int cpu)
724  {
725  	struct platform_device *pdev = coretemp_get_pdev(cpu);
726  	struct platform_data *pd;
727  	struct temp_data *tdata;
728  	int target;
729  
730  	/* No need to tear down any interfaces for suspend */
731  	if (cpuhp_tasks_frozen)
732  		return 0;
733  
734  	/* If the physical CPU device does not exist, just return */
735  	pd = platform_get_drvdata(pdev);
736  	if (!pd->hwmon_dev)
737  		return 0;
738  
739  	tdata = get_temp_data(pd, cpu);
740  
741  	cpumask_clear_cpu(cpu, &pd->cpumask);
742  
743  	/*
744  	 * If this is the last thread sibling, remove the CPU core
745  	 * interface, If there is still a sibling online, transfer the
746  	 * target cpu of that core interface to it.
747  	 */
748  	target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
749  	if (target >= nr_cpu_ids) {
750  		coretemp_remove_core(pd, tdata);
751  	} else if (tdata && tdata->cpu == cpu) {
752  		mutex_lock(&tdata->update_lock);
753  		tdata->cpu = target;
754  		mutex_unlock(&tdata->update_lock);
755  	}
756  
757  	/*
758  	 * If all cores in this pkg are offline, remove the interface.
759  	 */
760  	tdata = get_temp_data(pd, -1);
761  	if (cpumask_empty(&pd->cpumask)) {
762  		if (tdata)
763  			coretemp_remove_core(pd, tdata);
764  		hwmon_device_unregister(pd->hwmon_dev);
765  		pd->hwmon_dev = NULL;
766  		return 0;
767  	}
768  
769  	/*
770  	 * Check whether this core is the target for the package
771  	 * interface. We need to assign it to some other cpu.
772  	 */
773  	if (tdata && tdata->cpu == cpu) {
774  		target = cpumask_first(&pd->cpumask);
775  		mutex_lock(&tdata->update_lock);
776  		tdata->cpu = target;
777  		mutex_unlock(&tdata->update_lock);
778  	}
779  	return 0;
780  }
781  static const struct x86_cpu_id __initconst coretemp_ids[] = {
782  	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_DTHERM, NULL),
783  	{}
784  };
785  MODULE_DEVICE_TABLE(x86cpu, coretemp_ids);
786  
787  static enum cpuhp_state coretemp_hp_online;
788  
coretemp_init(void)789  static int __init coretemp_init(void)
790  {
791  	int i, err;
792  
793  	/*
794  	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
795  	 * sensors. We check this bit only, all the early CPUs
796  	 * without thermal sensors will be filtered out.
797  	 */
798  	if (!x86_match_cpu(coretemp_ids))
799  		return -ENODEV;
800  
801  	max_zones = topology_max_packages() * topology_max_dies_per_package();
802  	zone_devices = kcalloc(max_zones, sizeof(struct platform_device *),
803  			      GFP_KERNEL);
804  	if (!zone_devices)
805  		return -ENOMEM;
806  
807  	for (i = 0; i < max_zones; i++) {
808  		err = coretemp_device_add(i);
809  		if (err)
810  			goto outzone;
811  	}
812  
813  	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/coretemp:online",
814  				coretemp_cpu_online, coretemp_cpu_offline);
815  	if (err < 0)
816  		goto outzone;
817  	coretemp_hp_online = err;
818  	return 0;
819  
820  outzone:
821  	while (i--)
822  		coretemp_device_remove(i);
823  	kfree(zone_devices);
824  	return err;
825  }
module_init(coretemp_init)826  module_init(coretemp_init)
827  
828  static void __exit coretemp_exit(void)
829  {
830  	int i;
831  
832  	cpuhp_remove_state(coretemp_hp_online);
833  	for (i = 0; i < max_zones; i++)
834  		coretemp_device_remove(i);
835  	kfree(zone_devices);
836  }
837  module_exit(coretemp_exit)
838  
839  MODULE_AUTHOR("Rudolf Marek <r.marek@assembler.cz>");
840  MODULE_DESCRIPTION("Intel Core temperature monitor");
841  MODULE_LICENSE("GPL");
842