mirror of https://github.com/PCSX2/pcsx2.git
1677 lines
64 KiB
C
1677 lines
64 KiB
C
#include <stdint.h>
|
||
|
||
#include <cpuinfo.h>
|
||
#include <cpuinfo/internal-api.h>
|
||
#include <cpuinfo/log.h>
|
||
#include <arm/api.h>
|
||
#include <arm/midr.h>
|
||
|
||
|
||
void cpuinfo_arm_decode_cache(
|
||
enum cpuinfo_uarch uarch,
|
||
uint32_t cluster_cores,
|
||
uint32_t midr,
|
||
const struct cpuinfo_arm_chipset chipset[restrict static 1],
|
||
uint32_t cluster_id,
|
||
uint32_t arch_version,
|
||
struct cpuinfo_cache l1i[restrict static 1],
|
||
struct cpuinfo_cache l1d[restrict static 1],
|
||
struct cpuinfo_cache l2[restrict static 1],
|
||
struct cpuinfo_cache l3[restrict static 1])
|
||
{
|
||
switch (uarch) {
|
||
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__)
|
||
case cpuinfo_uarch_xscale:
|
||
switch (midr_get_part(midr) >> 8) {
|
||
case 2:
|
||
/*
|
||
* PXA 210/25X/26X
|
||
*
|
||
* See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface"
|
||
* by David A. Patterson, John L. Hennessy
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 32,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
case 4:
|
||
/* PXA 27X */
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 32,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 32,
|
||
.line_size = 32
|
||
};
|
||
break;
|
||
case 6:
|
||
/*
|
||
* PXA 3XX
|
||
*
|
||
* See http://download.intel.com/design/intelxscale/31628302.pdf
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 256 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 32
|
||
};
|
||
break;
|
||
}
|
||
break;
|
||
case cpuinfo_uarch_arm11:
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
break;
|
||
#endif /* CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) */
|
||
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
|
||
case cpuinfo_uarch_cortex_a5:
|
||
/*
|
||
* Cortex-A5 Technical Reference Manual:
|
||
* 7.1.1. Memory system
|
||
* The Cortex-A5 processor has separate instruction and data caches.
|
||
* The caches have the following features:
|
||
* - Data cache is 4-way set-associative.
|
||
* - Instruction cache is 2-way set-associative.
|
||
* - The cache line length is eight words.
|
||
* - You can configure the instruction and data caches independently during implementation
|
||
* to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB.
|
||
* 1.1.3. System design components
|
||
* PrimeCell Level 2 Cache Controller (PL310)
|
||
* The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a
|
||
* recognized method of improving the performance of ARM-based systems when significant memory traffic
|
||
* is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external
|
||
* memory accesses and has been optimized for use with the Cortex-A5 processor.
|
||
* 8.1.7. Exclusive L2 cache
|
||
* The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
|
||
* This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
|
||
*
|
||
* +--------------------+-----------+-----------+----------+-----------+
|
||
* | Processor model | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-----------+-----------+----------+-----------+
|
||
* | Qualcomm MSM7225A | | | | |
|
||
* | Qualcomm MSM7625A | | | | |
|
||
* | Qualcomm MSM7227A | | | | |
|
||
* | Qualcomm MSM7627A | 32K | 32K | 256K | Wiki [1] |
|
||
* | Qualcomm MSM7225AB | | | | |
|
||
* | Qualcomm MSM7225AB | | | | |
|
||
* | Qualcomm QSD8250 | | | | |
|
||
* | Qualcomm QSD8650 | | | | |
|
||
* +--------------------+-----------+-----------+----------+-----------+
|
||
* | Spreadtrum SC6821 | 32K | 32K | ? | |
|
||
* | Spreadtrum SC6825 | 32K | 32K | 256K | Wiki [2] |
|
||
* | Spreadtrum SC8810 | ? | ? | ? | |
|
||
* | Spreadtrum SC8825 | 32K | 32K | ? | |
|
||
* +--------------------+-----------+-----------+----------+-----------+
|
||
*
|
||
* [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1
|
||
* [2] https://en.wikipedia.org/wiki/Spreadtrum
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 256 * 1024,
|
||
/*
|
||
* Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size"
|
||
* Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf
|
||
*/
|
||
.associativity = 8,
|
||
.line_size = 32
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_cortex_a7:
|
||
/*
|
||
* Cortex-A7 MPCore Technical Reference Manual:
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches. You can configure the
|
||
* instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
|
||
*
|
||
* The L1 instruction memory system has the following features:
|
||
* - Instruction side cache line length of 32-bytes.
|
||
* - 2-way set-associative instruction cache.
|
||
*
|
||
* The L1 data memory system has the following features:
|
||
* - Data side cache line length of 64-bytes.
|
||
* - 4-way set-associative data cache.
|
||
*
|
||
* 7.1. About the L2 Memory system
|
||
* The L2 memory system consists of an:
|
||
* - Optional tightly-coupled L2 cache that includes:
|
||
* - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
|
||
* - Fixed line length of 64 bytes
|
||
* - 8-way set-associative cache structure
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Allwinner A20 | 2 | 32K | 32K | 256K | [1] |
|
||
* | Allwinner A23 | 2 | 32K | 32K | 256K | [2] |
|
||
* | Allwinner A31 | 4 | 32K | 32K | 1M | [3] |
|
||
* | Allwinner A31s | 4 | 32K | 32K | 1M | [4] |
|
||
* | Allwinner A33 | 4 | 32K | 32K | 512K | [5] |
|
||
* | Allwinner A80 Octa | 4(+4) | 32K | 32K | 512K(+2M) | [6] |
|
||
* | Allwinner A81T | 8 | 32K | 32K | 1M | [7] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Broadcom BCM2836 | 4 | 32K | 32K | 512K | [8] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Kirin 920 | 4(+4) | ? | ? | 512K | [9] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] https://linux-sunxi.org/A20
|
||
* [2] https://linux-sunxi.org/A23
|
||
* [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
|
||
* [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf
|
||
* [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf
|
||
* [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf
|
||
* [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
|
||
* [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428
|
||
* [9] http://www.gizmochina.com/2014/10/07/hisilicon-kirin-920-tear-down/
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 128 * 1024 * cluster_cores,
|
||
.associativity = 8,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_cortex_a8:
|
||
/*
|
||
* Cortex-A8 Technical Reference Manual:
|
||
* 7.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches in a Harvard arrangement.
|
||
* The L1 memory system provides the core with:
|
||
* - fixed line length of 64 bytes
|
||
* - support for 16KB or 32KB caches
|
||
* - 4-way set associative cache structure
|
||
* 8.1. About the L2 memory system
|
||
* The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache.
|
||
* The key features of the L2 memory system include:
|
||
* - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
|
||
* - fixed line length of 64 bytes
|
||
* - 8-way set associative cache structure
|
||
*
|
||
* +----------------------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +----------------------+-----------+-----------+-----------+-----------+
|
||
* | Exynos 3 Single 3110 | 32K | 32K | 512K | [1] |
|
||
* +----------------------+-----------+-----------+-----------+-----------+
|
||
* | TI DM 3730 | 32K | 32K | 256K | [2] |
|
||
* +----------------------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf
|
||
* [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.associativity = 8,
|
||
.line_size = 64
|
||
};
|
||
switch (chipset->vendor) {
|
||
case cpuinfo_arm_chipset_vendor_samsung:
|
||
l2->size = 512 * 1024;
|
||
break;
|
||
default:
|
||
l2->size = 256 * 1024;
|
||
break;
|
||
}
|
||
|
||
break;
|
||
case cpuinfo_uarch_cortex_a9:
|
||
/*
|
||
* ARM Cortex‑A9 Technical Reference Manual:
|
||
* 7.1.1 Memory system
|
||
* The Cortex‑A9 processor has separate instruction and data caches.
|
||
* The caches have the following features:
|
||
* - Both caches are 4-way set-associative.
|
||
* - The cache line length is eight words.
|
||
* - You can configure the instruction and data caches independently during implementation
|
||
* to sizes of 16KB, 32KB, or 64KB.
|
||
* 8.1.5 Exclusive L2 cache
|
||
* The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode.
|
||
* This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Exynos 4 Dual 4210 | 2 | 32K | 32K | 1M | [1] |
|
||
* | Exynos 4 Dual 4212 | 2 | 32K | 32K | 1M | [2] |
|
||
* | Exynos 4 Quad 4412 | 4 | 32K | 32K | 1M | [3] |
|
||
* | Exynos 4 Quad 4415 | 4 | 32K | 32K | 1M | |
|
||
* | TI OMAP 4430 | 2 | 32K | 32K | 1M | [4] |
|
||
* | TI OMAP 4460 | 2 | 32K | 32K | 1M | [5] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf
|
||
* [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf
|
||
* [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf
|
||
* [4] https://www.hotchips.org/wp-content/uploads/hc_archives/hc21/2_mon/HC21.24.400.ClientProcessors-Epub/HC21.24.421.Witt-OMAP4430.pdf
|
||
* [5] http://www.anandtech.com/show/5310/samsung-galaxy-nexus-ice-cream-sandwich-review/9
|
||
*/
|
||
|
||
/* Use Exynos 4 specs */
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 1024 * 1024,
|
||
/* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */
|
||
.associativity = 16,
|
||
.line_size = 32
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_cortex_a15:
|
||
/*
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches.
|
||
* The L1 instruction memory system has the following features:
|
||
* - 32KB 2-way set-associative instruction cache.
|
||
* - Fixed line length of 64 bytes.
|
||
* The L1 data memory system has the following features:
|
||
* - 32KB 2-way set-associative data cache.
|
||
* - Fixed line length of 64 bytes.
|
||
* 7.1. About the L2 memory system
|
||
* The features of the L2 memory system include:
|
||
* - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
|
||
* - Fixed line length of 64 bytes.
|
||
* - 16-way set-associative cache structure.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Exynos 5 Dual 5250 | 2 | 32K | 32K | 1M | [1] |
|
||
* | Exynos 5 Hexa 5260 | 2(+4) | 32K | 32K | 1M(+512K) | [2] |
|
||
* | Exynos 5 Octa 5410 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
|
||
* | Exynos 5 Octa 5420 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
|
||
* | Exynos 5 Octa 5422 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
|
||
* | Exynos 5 Octa 5430 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
|
||
* | Exynos 5 Octa 5800 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
|
||
* | Kirin 920 | 4(+4) | ? | ? | 2M(+512K) | [4] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf
|
||
* [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf
|
||
* [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13
|
||
* [4] http://www.gizmochina.com/2014/10/07/hisilicon-kirin-920-tear-down/
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 512 * 1024,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_cortex_a17:
|
||
/*
|
||
* ARM Cortex-A17 MPCore Processor Technical Reference Manual:
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches.
|
||
* The size of the instruction cache is implemented as either 32KB or 64KB.
|
||
* The size of the data cache is 32KB.
|
||
*
|
||
* The L1 instruction cache has the following features:
|
||
* - Instruction side cache line length of 64-bytes.
|
||
* - 4-way set-associative instruction cache.
|
||
*
|
||
* The L1 data cache has the following features:
|
||
* - Data side cache line length of 64-bytes.
|
||
* - 4-way set-associative data cache.
|
||
*
|
||
* 7.1. About the L2 Memory system
|
||
* An integrated L2 cache:
|
||
* - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
|
||
* - A fixed line length of 64 bytes.
|
||
* - 16-way set-associative cache structure.
|
||
*
|
||
* +------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | MediaTek MT6595 | 4(+4) | 32K | 32K | 2M(+512K) | [1] |
|
||
* +------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] https://blog.osakana.net/archives/5268
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 512 * 1024,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
#endif /* CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) */
|
||
case cpuinfo_uarch_cortex_a35:
|
||
/*
|
||
* ARM Cortex‑A35 Processor Technical Reference Manual:
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system includes several power-saving and performance-enhancing features.
|
||
* These include separate instruction and data caches, which can be configured
|
||
* independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
|
||
*
|
||
* L1 instruction-side memory system
|
||
* A dedicated instruction cache that:
|
||
* - is virtually indexed and physically tagged.
|
||
* - is 2-way set associative.
|
||
* - is configurable to be 8KB, 16KB, 32KB, or 64KB.
|
||
* - uses a cache line length of 64 bytes.
|
||
*
|
||
* L1 data-side memory system
|
||
* A dedicated data cache that:
|
||
* - is physically indexed and physically tagged.
|
||
* - is 4-way set associative.
|
||
* - is configurable to be 8KB, 16KB, 32KB, or 64KB.
|
||
* - uses a cache line length of 64 bytes.
|
||
*
|
||
* 7.1. About the L2 memory system
|
||
* The L2 cache is 8-way set associative.
|
||
* Further features of the L2 cache are:
|
||
* - Configurable size of 128KB, 256KB, 512KB, and 1MB.
|
||
* - Fixed line length of 64 bytes.
|
||
* - Physically indexed and tagged.
|
||
*
|
||
* +-----------------+---------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +-----------------+---------+-----------+-----------+-----------+-----------+
|
||
* | MediaTek MT6599 | 4(+4+2) | ? | ? | ? | |
|
||
* +-----------------+---------+-----------+-----------+-----------+-----------+
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024, /* assumption based on low-end Cortex-A53 */
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024, /* assumption based on low-end Cortex-A53 */
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 256 * 1024, /* assumption based on low-end Cortex-A53 */
|
||
.associativity = 8,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_cortex_a53:
|
||
/*
|
||
* ARM Cortex-A53 MPCore Processor Technical Reference Manual:
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches. The implementer configures the
|
||
* instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB.
|
||
*
|
||
* The L1 Instruction memory system has the following key features:
|
||
* - Instruction side cache line length of 64 bytes.
|
||
* - 2-way set associative L1 Instruction cache.
|
||
*
|
||
* The L1 Data memory system has the following features:
|
||
* - Data side cache line length of 64 bytes.
|
||
* - 4-way set associative L1 Data cache.
|
||
*
|
||
* 7.1. About the L2 memory system
|
||
* The L2 memory system consists of an:
|
||
* - Optional tightly-coupled L2 cache that includes:
|
||
* - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
|
||
* - Fixed line length of 64 bytes.
|
||
* - 16-way set-associative cache structure.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Broadcom BCM2837 | 4 | 16K | 16K | 512K | [1] |
|
||
* | Exynos 7420 | 4(+4) | 32K | 32K | 256K | [2, 3] |
|
||
* | Exynos 8890 | 4(+4) | 32K | 32K | 256K | [4] |
|
||
* | Rochchip RK3368 | 4+4 | 32K | 32K | 512K+256K | sysfs |
|
||
* | MediaTek MT8173C | 2(+2) | 32K | 32K | 512K(+1M) | sysfs |
|
||
* | Snapdragon 410 | 4 | 32K | 32K | 512K | [3] |
|
||
* | Snapdragon 630 | 4+4 | 32K | 32K | 1M+512K | sysfs |
|
||
* | Snapdragon 636 | 4(+4) | 32K+64K | 32K+64K | 1M+1M | sysfs |
|
||
* | Snapdragon 660 | 4(+4) | 32K+64K | 32K+64K | 1M+1M | sysfs |
|
||
* | Snapdragon 835 | 4(+4) | 32K+64K | 32K+64K | 1M(+2M) | sysfs |
|
||
* | Kirin 620 | 4+4 | 32K | 32K | 512K | [5] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766
|
||
* [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2
|
||
* [3] https://www.usenix.org/system/files/conference/usenixsecurity16/sec16_paper_lipp.pdf
|
||
* [4] http://www.boardset.com/products/products_v8890.php
|
||
* [5] http://mirror.lemaker.org/Hi6220V100_Multi-Mode_Application_Processor_Function_Description.pdf
|
||
*/
|
||
if (midr_is_qualcomm_cortex_a53_silver(midr)) {
|
||
/* Qualcomm-modified Cortex-A53 in Snapdragon 630/660/835 */
|
||
|
||
uint32_t l2_size = 512 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_qualcomm_msm:
|
||
if (chipset->model == 8998) {
|
||
/* Snapdragon 835 (MSM8998): 1 MB L2 (little cores only) */
|
||
l2_size = 1024 * 1024;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
|
||
switch (chipset->model) {
|
||
case 630:
|
||
if (cluster_id == 0) {
|
||
/* Snapdragon 630: 1 MB L2 for the big cores */
|
||
l2_size = 1024 * 1024;
|
||
}
|
||
break;
|
||
case 636:
|
||
/* Snapdragon 636: 1 MB L2 (little cores only) */
|
||
l2_size = 1024 * 1024;
|
||
break;
|
||
case 660:
|
||
/* Snapdragon 660: 1 MB L2 (little cores only) */
|
||
l2_size = 1024 * 1024;
|
||
break;
|
||
}
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
} else {
|
||
/* Standard Cortex-A53 */
|
||
|
||
/* Use conservative values by default */
|
||
uint32_t l1_size = 16 * 1024;
|
||
uint32_t l2_size = 256 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_qualcomm_msm:
|
||
l1_size = 32 * 1024;
|
||
l2_size = 512 * 1024;
|
||
switch (chipset->model) {
|
||
case 8937: /* Snapdragon 430 */
|
||
case 8940: /* Snapdragon 435 */
|
||
case 8953: /* Snapdragon 625 or 626 (8953PRO) */
|
||
if (cluster_id == 0) {
|
||
/* 1M L2 for big cluster */
|
||
l2_size = 1024 * 1024;
|
||
}
|
||
break;
|
||
case 8952: /* Snapdragon 617 */
|
||
if (cluster_id != 0) {
|
||
/* 256K L2 for LITTLE cluster */
|
||
l2_size = 256 * 1024;
|
||
}
|
||
break;
|
||
default:
|
||
/* Silence compiler warning about unhandled enum values */
|
||
break;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_qualcomm_apq:
|
||
l1_size = 32 * 1024;
|
||
l2_size = 512 * 1024;
|
||
break;
|
||
case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
|
||
l1_size = 32 * 1024;
|
||
l2_size = 512 * 1024;
|
||
if (chipset->model == 450 && cluster_id == 0) {
|
||
/* Snapdragon 450: 1M L2 for big cluster */
|
||
l2_size = 1024 * 1024;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_hisilicon_hi:
|
||
l1_size = 32 * 1024;
|
||
l2_size = 512 * 1024;
|
||
break;
|
||
case cpuinfo_arm_chipset_series_hisilicon_kirin:
|
||
l1_size = 32 * 1024;
|
||
switch (chipset->model) {
|
||
case 970: /* Kirin 970 */
|
||
l2_size = 1024 * 1024;
|
||
break;
|
||
default:
|
||
l2_size = 512 * 1024;
|
||
break;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_mediatek_mt:
|
||
switch (chipset->model) {
|
||
case 8173:
|
||
l1_size = 32 * 1024;
|
||
l2_size = 512 * 1024;
|
||
break;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_rockchip_rk:
|
||
l1_size = 32 * 1024;
|
||
switch (chipset->model) {
|
||
case 3368:
|
||
if (cluster_id == 0) {
|
||
/* RK3368: 512 KB L2 for the big cores */
|
||
l2_size = 512 * 1024;
|
||
}
|
||
break;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_broadcom_bcm:
|
||
switch (chipset->model) {
|
||
case 2837: /* BCM2837 */
|
||
l2_size = 512 * 1024;
|
||
break;
|
||
}
|
||
break;
|
||
case cpuinfo_arm_chipset_series_samsung_exynos:
|
||
l1_size = 32 * 1024;
|
||
break;
|
||
default:
|
||
/* Silence compiler warning about unhandled enum values */
|
||
break;
|
||
}
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = l1_size,
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = l1_size,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
}
|
||
break;
|
||
case cpuinfo_uarch_cortex_a55r0:
|
||
case cpuinfo_uarch_cortex_a55:
|
||
/*
|
||
* ARM Cortex-A55 Core Technical Reference Manual
|
||
* A6.1. About the L1 memory system
|
||
* The Cortex®-A55 core's L1 memory system enhances core performance and power efficiency.
|
||
* It consists of separate instruction and data caches. You can configure instruction and data caches
|
||
* independently during implementation to sizes of 16KB, 32KB, or 64KB.
|
||
*
|
||
* L1 instruction-side memory system
|
||
* The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are:
|
||
* - 64-byte instruction side cache line length.
|
||
* - 4-way set associative L1 instruction cache.
|
||
*
|
||
* L1 data-side memory system
|
||
* - 64-byte data side cache line length.
|
||
* - 4-way set associative L1 data cache.
|
||
*
|
||
* A7.1 About the L2 memory system
|
||
* The Cortex-A55 L2 memory system is required to interface the Cortex-A55 cores to the L3 memory system.
|
||
* The L2 memory subsystem consists of:
|
||
* - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB or 256KB. Cache
|
||
* lines have a fixed length of 64 bytes.
|
||
*
|
||
* The main features of the L2 memory system are:
|
||
* - Strictly exclusive with L1 data cache.
|
||
* - Pseudo-inclusive with L1 instruction cache.
|
||
* - Private per-core unified L2 cache.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Snapdragon 845 | 4(+4) | 32K | 32K | 128K | 2M | [1], sysfs |
|
||
* | Exynos 9810 | 4(+4) | ? | ? | None | 512K | [2] |
|
||
* | Kirin 980 | 4(+4) | 32K | 32K | 128K | 4M | [3] |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
*
|
||
* [1] https://www.anandtech.com/show/12114/qualcomm-announces-snapdragon-845-soc
|
||
* [2] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results
|
||
* [3] https://en.wikichip.org/wiki/hisilicon/kirin/980
|
||
*/
|
||
if (midr_is_qualcomm_cortex_a55_silver(midr)) {
|
||
/* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */
|
||
uint32_t l3_size = 1024 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
|
||
/* Snapdragon 845: 2M L3 cache */
|
||
if (chipset->model == 845) {
|
||
l3_size = 2 * 1024 * 1024;
|
||
}
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 128 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = l3_size,
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
};
|
||
} else {
|
||
/* Standard Cortex-A55 */
|
||
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos) {
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 512 * 1024,
|
||
/* DynamIQ */
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
};
|
||
} else {
|
||
uint32_t l3_size = 1024 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_hisilicon_kirin:
|
||
/* Kirin 980: 4M L3 cache */
|
||
if (chipset->model == 980) {
|
||
l3_size = 4 * 1024 * 1024;
|
||
}
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 128 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = l3_size,
|
||
/* DynamIQ */
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
};
|
||
}
|
||
}
|
||
break;
|
||
case cpuinfo_uarch_cortex_a57:
|
||
/*
|
||
* ARM Cortex-A57 MPCore Processor Technical Reference Manual:
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches.
|
||
*
|
||
* The L1 instruction memory system has the following features:
|
||
* - 48KB 3-way set-associative instruction cache.
|
||
* - Fixed line length of 64 bytes.
|
||
*
|
||
* The L1 data memory system has the following features:
|
||
* - 32KB 2-way set-associative data cache.
|
||
* - Fixed line length of 64 bytes.
|
||
*
|
||
* 7.1 About the L2 memory system
|
||
* The features of the L2 memory system include:
|
||
* - Configurable L2 cache size of 512KB, 1MB, and 2MB.
|
||
* - Fixed line length of 64 bytes.
|
||
* - 16-way set-associative cache structure.
|
||
* - Inclusion property with L1 data caches.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Snapdragon 810 | 4(+4) | 32K | 48K | 2M | [1] |
|
||
* | Exynos 7420 | 4(+4) | 32K | 48K | 2M | [2] |
|
||
* | Jetson TX1 | 4 | 32K | 48K | 2M | [3] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] http://www.anandtech.com/show/9837/snapdragon-820-preview
|
||
* [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2
|
||
* [3] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 48 * 1024,
|
||
.associativity = 3,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 512 * 1024,
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_cortex_a65:
|
||
{
|
||
/*
|
||
* ARM Cortex‑A65 Core Technical Reference Manual
|
||
* A6.1. About the L1 memory system
|
||
* The L1 memory system enhances the performance and power efficiency in the Cortex‑A65 core.
|
||
* It consists of separate instruction and data caches. You can configure instruction and data caches
|
||
* independently during implementation to sizes of 32KB or 64KB.
|
||
*
|
||
* L1 instruction-side memory system
|
||
* The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are:
|
||
* - 64-byte instruction side cache line length.
|
||
* - 4-way set associative L1 instruction cache.
|
||
*
|
||
* L1 data-side memory system
|
||
* - 64-byte data side cache line length.
|
||
* - 4-way set associative L1 data cache.
|
||
*
|
||
* A7.1 About the L2 memory system
|
||
* The Cortex‑A65 L2 memory system is required to interface the Cortex‑A65 cores to the L3 memory system.
|
||
* The L2 memory subsystem consists of:
|
||
* - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB.
|
||
* Cache lines have a fixed length of 64 bytes.
|
||
*
|
||
* The main features of the L2 memory system are:
|
||
* - Strictly exclusive with L1 data cache.
|
||
* - Pseudo-inclusive with L1 instruction cache.
|
||
* - Private per-core unified L2 cache.
|
||
*/
|
||
const uint32_t l1_size = 32 * 1024;
|
||
const uint32_t l2_size = 128 * 1024;
|
||
const uint32_t l3_size = 512 * 1024;
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = l1_size,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = l1_size,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = l3_size,
|
||
/* DynamIQ */
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
};
|
||
break;
|
||
}
|
||
case cpuinfo_uarch_cortex_a72:
|
||
{
|
||
/*
|
||
* ARM Cortex-A72 MPCore Processor Technical Reference Manual
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches.
|
||
*
|
||
* The L1 instruction memory system has the following features:
|
||
* - 48KB 3-way set-associative instruction cache.
|
||
* - Fixed line length of 64 bytes.
|
||
*
|
||
* The L1 data memory system has the following features:
|
||
* - 32KB 2-way set-associative data cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
*
|
||
* 7.1 About the L2 memory system
|
||
* The features of the L2 memory system include:
|
||
* - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
|
||
* - Fixed line length of 64 bytes.
|
||
* - Banked pipeline structures.
|
||
* - Inclusion property with L1 data caches.
|
||
* - 16-way set-associative cache structure.
|
||
*
|
||
* +---------------------+---------+-----------+-----------+------------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +---------------------+---------+-----------+-----------+------------+-----------+
|
||
* | Snapdragon 650 | 2(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [1] |
|
||
* | Snapdragon 652 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [2] |
|
||
* | Snapdragon 653 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [3] |
|
||
* | HiSilicon Kirin 950 | 4(+4) | 32K+32K | 48K+32K | ? | |
|
||
* | HiSilicon Kirin 955 | 4(+4) | 32K+32K | 48K+32K | ? | |
|
||
* | MediaTek MT8173C | 2(+2) | 32K(+32K) | 48K(+32K) | 1M(+512K) | sysfs |
|
||
* | MediaTek Helio X20 | 2(+4+4) | ? | ? | ? | |
|
||
* | MediaTek Helio X23 | 2(+4+4) | ? | ? | ? | |
|
||
* | MediaTek Helio X25 | 2(+4+4) | ? | ? | ? | |
|
||
* | MediaTek Helio X27 | 2(+4+4) | ? | ? | ? | |
|
||
* | Broadcom BCM2711 | 4 | 32K | 48K | 1M | [4] |
|
||
* +---------------------+---------+-----------+-----------+------------+-----------+
|
||
*
|
||
* [1] http://pdadb.net/index.php?m=processor&id=578&c=qualcomm_snapdragon_618_msm8956__snapdragon_650
|
||
* [2] http://pdadb.net/index.php?m=processor&id=667&c=qualcomm_snapdragon_620_apq8076__snapdragon_652
|
||
* [3] http://pdadb.net/index.php?m=processor&id=692&c=qualcomm_snapdragon_653_msm8976sg__msm8976_pro
|
||
* [4] https://www.raspberrypi.org/documentation/hardware/raspberrypi/bcm2711/README.md
|
||
*/
|
||
uint32_t l2_size;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_hisilicon_kirin:
|
||
l2_size = 2 * 1024 * 1024;
|
||
break;
|
||
default:
|
||
l2_size = 1024 * 1024;
|
||
break;
|
||
}
|
||
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 48 * 1024,
|
||
.associativity = 3,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 2,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE
|
||
};
|
||
break;
|
||
}
|
||
case cpuinfo_uarch_cortex_a73:
|
||
{
|
||
/*
|
||
* ARM Cortex‑A73 MPCore Processor Technical Reference Manual
|
||
* 6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches.
|
||
* The size of the instruction cache is 64KB.
|
||
* The size of the data cache is configurable to either 32KB or 64KB.
|
||
*
|
||
* The L1 instruction memory system has the following key features:
|
||
* - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
*
|
||
* The L1 data memory system has the following features:
|
||
* - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations)
|
||
* and a 16-way set associative PIPT cache (for 64KB configurations).
|
||
* - Fixed cache line length of 64 bytes.
|
||
*
|
||
* 7.1 About the L2 memory system
|
||
* The L2 memory system consists of:
|
||
* - A tightly-integrated L2 cache with:
|
||
* - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
|
||
* - A 16-way, set-associative structure.
|
||
* - A fixed line length of 64 bytes.
|
||
*
|
||
* The ARM Cortex A73 - Artemis Unveiled [1]
|
||
* "ARM still envisions that most vendors will choose to use configurations of 1 to
|
||
* 2MB in consumer products. The L2 cache is inclusive of the L1 cache. "
|
||
*
|
||
* +---------------------+---------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +---------------------+---------+-----------+-----------+-----------+-----------+
|
||
* | HiSilicon Kirin 960 | 4(+4) | 64K+32K | 64K+32K | ? | [2] |
|
||
* | MediaTek Helio X30 | 2(+4+4) | ? | 64K+ ? | ? | |
|
||
* | Snapdragon 636 | 4(+4) | 64K(+32K) | 64K(+32K) | 1M(+1M) | sysfs |
|
||
* | Snapdragon 660 | 4(+4) | 64K+32K | 64K+32K | 1M(+1M) | [3] |
|
||
* | Snapdragon 835 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | sysfs |
|
||
* +---------------------+---------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2
|
||
* [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3
|
||
* [3] https://arstechnica.com/gadgets/2017/05/qualcomms-snapdragon-660-and-630-bring-more-high-end-features-to-midrange-chips/
|
||
*/
|
||
uint32_t l1d_size = 32 * 1024;
|
||
uint32_t l2_size = 512 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_hisilicon_kirin:
|
||
l1d_size = 64 * 1024;
|
||
l2_size = 2 * 1024 * 1024;
|
||
break;
|
||
case cpuinfo_arm_chipset_series_mediatek_mt:
|
||
l1d_size = 64 * 1024;
|
||
l2_size = 1 * 1024 * 1024; /* TODO: verify assumption */
|
||
break;
|
||
default:
|
||
switch (midr) {
|
||
case UINT32_C(0x51AF8001): /* Kryo 280 Gold */
|
||
l1d_size = 64 * 1024;
|
||
l2_size = 2 * 1024 * 1024;
|
||
break;
|
||
case UINT32_C(0x51AF8002): /* Kryo 260 Gold */
|
||
l1d_size = 64 * 1024;
|
||
l2_size = 1 * 1024 * 1024;
|
||
break;
|
||
}
|
||
}
|
||
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = l1d_size,
|
||
.associativity = (l1d_size >> 12),
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE
|
||
};
|
||
break;
|
||
}
|
||
case cpuinfo_uarch_cortex_a75:
|
||
{
|
||
/*
|
||
* ARM Cortex-A75 Core Technical Reference Manual
|
||
* A6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
|
||
*
|
||
* A6.1.1 L1 instruction-side memory system
|
||
* The L1 instruction memory system has the following key features:
|
||
* - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
*
|
||
* A6.1.2 L1 data-side memory system
|
||
* The L1 data memory system has the following features:
|
||
* - Physically Indexed, Physically Tagged (PIPT), 16-way set-associative L1 data cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
* - Pseudo-random cache replacement policy.
|
||
*
|
||
* A7.1 About the L2 memory system
|
||
* The L2 memory subsystem consist of:
|
||
* - An 8-way set associative L2 cache with a configurable size of 256KB or 512KB.
|
||
* Cache lines have a fixed length of 64 bytes.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Snapdragon 845 | 4(+4) | 64K | 64K | 256K | 2M | [1], sysfs |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
*
|
||
* [1] https://www.anandtech.com/show/12114/qualcomm-announces-snapdragon-845-soc
|
||
*/
|
||
uint32_t l3_size = 1024 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
|
||
/* Snapdragon 845: 2M L3 cache */
|
||
if (chipset->model == 845) {
|
||
l3_size = 2 * 1024 * 1024;
|
||
}
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 256 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 64
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = l3_size,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
}
|
||
case cpuinfo_uarch_cortex_a76:
|
||
{
|
||
/*
|
||
* ARM Cortex-A76 Core Technical Reference Manual
|
||
* A6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
|
||
*
|
||
* A6.1.1 L1 instruction-side memory system
|
||
* The L1 instruction memory system has the following key features:
|
||
* - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
|
||
* Physically Tagged (PIPT) 4-way set-associative L1 data cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
*
|
||
* A6.1.2 L1 data-side memory system
|
||
* The L1 data memory system has the following features:
|
||
* - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
|
||
* Physically Tagged (PIPT) 4-way set-associative L1 data cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
* - Pseudo-LRU cache replacement policy.
|
||
*
|
||
* A7.1 About the L2 memory system
|
||
* The L2 memory subsystem consist of:
|
||
* - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB.
|
||
* Cache lines have a fixed length of 64 bytes.
|
||
* - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache.
|
||
* - Dynamic biased replacement policy.
|
||
* - Modified Exclusive Shared Invalid (MESI) coherency.
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Kirin 980 | 4(+4) | 64K | 64K | 512K | 4M | [1], [2] |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
*
|
||
* [1] https://www.anandtech.com/show/13298/hisilicon-announces-the-kirin-980-first-a76-g76-on-7nm
|
||
* [2] https://en.wikichip.org/wiki/hisilicon/kirin/980
|
||
*/
|
||
uint32_t l2_size = 256 * 1024;
|
||
uint32_t l3_size = 1024 * 1024;
|
||
switch (chipset->series) {
|
||
case cpuinfo_arm_chipset_series_hisilicon_kirin:
|
||
/* Kirin 980: 512K L2 cache + 4M L3 cache */
|
||
if (chipset->model == 980) {
|
||
l2_size = 512 * 1024;
|
||
l3_size = 4 * 1024 * 1024;
|
||
}
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 8,
|
||
.line_size = 64,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE,
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = l3_size,
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
};
|
||
break;
|
||
}
|
||
case cpuinfo_uarch_cortex_a77:
|
||
{
|
||
/*
|
||
* ARM Cortex-A77 Core Technical Reference Manual
|
||
* A6.1. About the L1 memory system
|
||
* The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
|
||
*
|
||
* A6.1.1 L1 instruction-side memory system
|
||
* The L1 instruction memory system has the following key features:
|
||
* - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
|
||
* Physically Tagged (PIPT) 4-way set-associative L1 data cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
*
|
||
* A6.1.2 L1 data-side memory system
|
||
* The L1 data memory system has the following features:
|
||
* - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
|
||
* Physically Tagged (PIPT) 4-way set-associative L1 data cache.
|
||
* - Fixed cache line length of 64 bytes.
|
||
* - Pseudo-LRU cache replacement policy.
|
||
*
|
||
* A7.1 About the L2 memory system
|
||
* The L2 memory subsystem consist of:
|
||
* - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines
|
||
* have a fixed length of 64 bytes.
|
||
* - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache.
|
||
*/
|
||
const uint32_t l2_size = 256 * 1024;
|
||
const uint32_t l3_size = 1024 * 1024;
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64,
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = l2_size,
|
||
.associativity = 8,
|
||
.line_size = 64,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE,
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = l3_size,
|
||
.associativity = 16,
|
||
.line_size = 64,
|
||
};
|
||
break;
|
||
}
|
||
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
|
||
case cpuinfo_uarch_scorpion:
|
||
/*
|
||
* - "The CPU includes 32KB instruction and data caches as
|
||
* well as a complete memory-management unit (MMU) suitable
|
||
* for high-level operating systems. The CPU also has
|
||
* 256KB of SRAM that can be allocated in 64KB increments
|
||
* to level-two (L2) cache or tightly coupled memory (TCM)." [1]
|
||
* We interpret it as L2 cache being 4-way set-associative on single-core Scorpion.
|
||
* - L1 Data Cache = 32 KB. 32 B/line. [2]
|
||
* - L2 Cache = 256 KB. 128 B/line. [2]
|
||
* - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3]
|
||
* - Single or dual-core configuration [3]
|
||
* - For L1 cache assume the same associativity as Krait
|
||
*
|
||
* [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf
|
||
* [2] http://www.7-cpu.com/cpu/Snapdragon.html
|
||
* [3] https://en.wikipedia.org/wiki/Scorpion_(CPU)
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 256 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 128
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_krait:
|
||
/*
|
||
* - L0 Data cache = 4 KB. 64 B/line, direct mapped [1]
|
||
* - L0 Instruction cache = 4 KB. [1]
|
||
* - L1 Data cache = 16 KB. 64 B/line, 4-way [1]
|
||
* - L1 Instruction cache = 16 KB, 4-way [1]
|
||
* - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1]
|
||
* - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2]
|
||
*
|
||
* [1] http://www.7-cpu.com/cpu/Krait.html
|
||
* [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64 /* assume same as L1D */
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 512 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 128
|
||
};
|
||
break;
|
||
#endif /* CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) */
|
||
case cpuinfo_uarch_kryo:
|
||
/*
|
||
* +-----------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +-----------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Snapdragon 820 | 2+2 | 24K | 32K | 1M+512K | [1, 2] |
|
||
* | Snapdragon 821 | 2+2 | ? | ? | 1M+512K | [1] |
|
||
* +-----------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2
|
||
* [2] https://www.inforcecomputing.com/public_docs/Inforce6601/Inforce_6601_Micro-SOM_FAQs_04-2016-1.pdf
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 24 * 1024,
|
||
.associativity = 3,
|
||
.line_size = 64
|
||
};
|
||
if (midr_is_kryo_silver(midr)) {
|
||
/* Kryo "Silver" */
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 512 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 128
|
||
};
|
||
} else {
|
||
/* Kryo "Gold" */
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 1024 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 128
|
||
};
|
||
}
|
||
break;
|
||
case cpuinfo_uarch_denver:
|
||
case cpuinfo_uarch_denver2:
|
||
/*
|
||
* The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache,
|
||
* and a 2MB, 16-way level 2 cache, all of which can service both cores. [1]
|
||
*
|
||
* All the caches have 64-byte lines. [2]
|
||
*
|
||
* [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html
|
||
* [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 128 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 2 * 1024 * 1024,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_exynos_m1:
|
||
case cpuinfo_uarch_exynos_m2:
|
||
/*
|
||
* - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
|
||
* namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
|
||
* - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1]
|
||
* - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split
|
||
* into 4 banks and has a 22 cycle latency" [1]
|
||
*
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
* | Exynos 8 Octa 8890 | 4(+4) | 64K | 32K | 2M | [1] |
|
||
* | Exynos 8 Octa 8895 | 4(+4) | 64K | 32K | 2M | [2] |
|
||
* +--------------------+-------+-----------+-----------+-----------+-----------+
|
||
*
|
||
* [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed
|
||
* [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 128
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 2 * 1024 * 1024,
|
||
.associativity = 16,
|
||
.line_size = 64
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_exynos_m3:
|
||
/*
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
* | Exynos 9810 | 4(+4) | 64K | ? | 512K | 4M | [1] |
|
||
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
|
||
*
|
||
* [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */,
|
||
.associativity = 4 /* assume same as in Exynos M1/M2 cores */,
|
||
.line_size = 128 /* assume same as in Exynos M1/M2 cores */
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 8 /* assume same as in Exynos M1/M2 cores */,
|
||
.line_size = 64 /* assume same as in Exynos M1/M2 cores */,
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 512 * 1024,
|
||
.associativity = 16 /* assume same as in Exynos M1/M2 cores */,
|
||
.line_size = 64 /* assume same as in Exynos M1/M2 cores */,
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = 4 * 1024 * 1024,
|
||
.associativity = 16 /* assume DynamIQ cache */,
|
||
.line_size = 64 /* assume DynamIQ cache */,
|
||
};
|
||
break;
|
||
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
|
||
case cpuinfo_uarch_thunderx:
|
||
/*
|
||
* "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1]
|
||
*
|
||
* [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 78 * 1024,
|
||
.associativity = 4 /* assumption */,
|
||
.line_size = 64 /* assumption */
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4 /* assumption */,
|
||
.line_size = 64 /* assumption */
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024 * 1024,
|
||
.associativity = 8 /* assumption */,
|
||
.line_size = 64 /* assumption */
|
||
};
|
||
break;
|
||
case cpuinfo_uarch_taishan_v110:
|
||
/*
|
||
* It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1]
|
||
*
|
||
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
|
||
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
|
||
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
|
||
* | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] |
|
||
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
|
||
* | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] |
|
||
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
|
||
* | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] |
|
||
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
|
||
*
|
||
* [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110
|
||
* [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
|
||
* [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
|
||
* [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
|
||
*/
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4 /* assumption */,
|
||
.line_size = 128 /* assumption */,
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 64 * 1024,
|
||
.associativity = 4 /* assumption */,
|
||
.line_size = 128 /* assumption */,
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = 512 * 1024,
|
||
.associativity = 8 /* assumption */,
|
||
.line_size = 128 /* assumption */,
|
||
.flags = CPUINFO_CACHE_INCLUSIVE /* assumption */,
|
||
};
|
||
*l3 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 1024 * 1024,
|
||
.associativity = 16 /* assumption */,
|
||
.line_size = 128 /* assumption */,
|
||
};
|
||
break;
|
||
#endif
|
||
case cpuinfo_uarch_cortex_a12:
|
||
case cpuinfo_uarch_cortex_a32:
|
||
default:
|
||
cpuinfo_log_warning("target uarch not recognized; using generic cache parameters");
|
||
/* Follow OpenBLAS */
|
||
if (arch_version >= 8) {
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 32 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 64
|
||
};
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 256 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 64
|
||
};
|
||
} else {
|
||
*l1i = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
*l1d = (struct cpuinfo_cache) {
|
||
.size = 16 * 1024,
|
||
.associativity = 4,
|
||
.line_size = 32
|
||
};
|
||
if (arch_version >= 7) {
|
||
*l2 = (struct cpuinfo_cache) {
|
||
.size = cluster_cores * 128 * 1024,
|
||
.associativity = 8,
|
||
.line_size = 32
|
||
};
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
l1i->sets = l1i->size / (l1i->associativity * l1i->line_size);
|
||
l1i->partitions = 1;
|
||
l1d->sets = l1d->size / (l1d->associativity * l1d->line_size);
|
||
l1d->partitions = 1;
|
||
if (l2->size != 0) {
|
||
l2->sets = l2->size / (l2->associativity * l2->line_size);
|
||
l2->partitions = 1;
|
||
if (l3->size != 0) {
|
||
l3->sets = l3->size / (l3->associativity * l3->line_size);
|
||
l3->partitions = 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) {
|
||
/*
|
||
* There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo
|
||
* may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum.
|
||
*/
|
||
switch (processor->core->uarch) {
|
||
case cpuinfo_uarch_xscale:
|
||
case cpuinfo_uarch_arm11:
|
||
case cpuinfo_uarch_scorpion:
|
||
case cpuinfo_uarch_krait:
|
||
case cpuinfo_uarch_kryo:
|
||
case cpuinfo_uarch_exynos_m1:
|
||
case cpuinfo_uarch_exynos_m2:
|
||
case cpuinfo_uarch_exynos_m3:
|
||
/* cpuinfo-detected cache size always correct */
|
||
return cpuinfo_compute_max_cache_size(processor);
|
||
case cpuinfo_uarch_cortex_a5:
|
||
/* Max observed (NXP Vybrid SoC) */
|
||
return 512 * 1024;
|
||
case cpuinfo_uarch_cortex_a7:
|
||
/*
|
||
* Cortex-A7 MPCore Technical Reference Manual:
|
||
* 7.1. About the L2 Memory system
|
||
* The L2 memory system consists of an:
|
||
* - Optional tightly-coupled L2 cache that includes:
|
||
* - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
|
||
*/
|
||
return 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a8:
|
||
/*
|
||
* Cortex-A8 Technical Reference Manual:
|
||
* 8.1. About the L2 memory system
|
||
* The key features of the L2 memory system include:
|
||
* - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
|
||
*/
|
||
return 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a9:
|
||
/* Max observed (e.g. Exynos 4212) */
|
||
return 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a12:
|
||
case cpuinfo_uarch_cortex_a17:
|
||
/*
|
||
* ARM Cortex-A17 MPCore Processor Technical Reference Manual:
|
||
* 7.1. About the L2 Memory system
|
||
* The key features of the L2 memory system include:
|
||
* - An integrated L2 cache:
|
||
* - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
|
||
*/
|
||
return 8 * 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a15:
|
||
/*
|
||
* ARM Cortex-A15 MPCore Processor Technical Reference Manual:
|
||
* 7.1. About the L2 memory system
|
||
* The features of the L2 memory system include:
|
||
* - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
|
||
*/
|
||
return 4 * 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a35:
|
||
/*
|
||
* ARM Cortex‑A35 Processor Technical Reference Manual:
|
||
* 7.1 About the L2 memory system
|
||
* L2 cache
|
||
* - Further features of the L2 cache are:
|
||
* - Configurable size of 128KB, 256KB, 512KB, and 1MB.
|
||
*/
|
||
return 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a53:
|
||
/*
|
||
* ARM Cortex-A53 MPCore Processor Technical Reference Manual:
|
||
* 7.1. About the L2 memory system
|
||
* The L2 memory system consists of an:
|
||
* - Optional tightly-coupled L2 cache that includes:
|
||
* - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
|
||
*/
|
||
return 2 * 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a57:
|
||
/*
|
||
* ARM Cortex-A57 MPCore Processor Technical Reference Manual:
|
||
* 7.1 About the L2 memory system
|
||
* The features of the L2 memory system include:
|
||
* - Configurable L2 cache size of 512KB, 1MB, and 2MB.
|
||
*/
|
||
return 2 * 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a72:
|
||
/*
|
||
* ARM Cortex-A72 MPCore Processor Technical Reference Manual:
|
||
* 7.1 About the L2 memory system
|
||
* The features of the L2 memory system include:
|
||
* - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
|
||
*/
|
||
return 4 * 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a73:
|
||
/*
|
||
* ARM Cortex‑A73 MPCore Processor Technical Reference Manual
|
||
* 7.1 About the L2 memory system
|
||
* The L2 memory system consists of:
|
||
* - A tightly-integrated L2 cache with:
|
||
* - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
|
||
*/
|
||
return 8 * 1024 * 1024;
|
||
case cpuinfo_uarch_cortex_a55:
|
||
case cpuinfo_uarch_cortex_a75:
|
||
case cpuinfo_uarch_cortex_a76:
|
||
case cpuinfo_uarch_exynos_m4:
|
||
default:
|
||
/*
|
||
* ARM DynamIQ Shared Unit Technical Reference Manual
|
||
* 1.3 Implementation options
|
||
* L3_CACHE_SIZE
|
||
* - 256KB
|
||
* - 512KB
|
||
* - 1024KB
|
||
* - 1536KB
|
||
* - 2048KB
|
||
* - 3072KB
|
||
* - 4096KB
|
||
*/
|
||
return 4 * 1024 * 1024;
|
||
}
|
||
}
|