03-feature
false sharing 英 [fɔːls ˈʃeərɪŋ] 伪共享;错误共享;假共享;多线程伪共享;虚假共享
barrier 英 [ˈbæriə(r)] 美 [ˈbæriər] n. 障碍;屏障;阻力;关卡;分界线;隔阂
fence 英 [fens] 美 [fens] n. 栅栏;篱笆;围栏; v. (用栅栏、篱笆或围栏)围住,隔开
biased 英 [ˈbaɪəst] 美 [ˈbaɪəst] adj. 有偏见的;偏向;倾向性的;片面的;偏重 v. 使有偏见;使偏心;使偏向
0. 并发编程三大特性
- 可见性(visibility)
- 有序性(ordering)
- 原子性(atomicity)
1. visibility
1. volatile
1. 保证线程可见性
- 每个线程都会对用到的变量有一个copy
volatile
使多线程可见- 某些语句触发内存缓存同步刷新
public class T01_volatile {
private static boolean running = true;
/*
* 1. Thread_A、Thread_B共享一个变量,java默认是A、B中都保留一份copy
* B修改了变量,A不可见
*/
@Test
public void T1_trouble() {
Thread t = new Thread(() -> {
System.out.println("t, start");
// 只能读到线程内running的copy,最新的running值读不到
while (running) {
}
System.out.println("t, end!");
});
t.start();
ThreadHelper.sleepSeconds(1);
running = false;
ThreadHelper.join(t);
}
/**
* 2. 某些语句触发内存缓存同步刷新
*/
@Test
public void T2_syncRefresh() {
Thread t = new Thread(() -> {
System.out.println("t, start");
while (running) {
// 触发内存、缓存同步刷新。看原码,有锁synchronized,降低性能
System.out.println("hello");
}
System.out.println("t, end!");
});
t.start();
ThreadHelper.sleepSeconds(1);
running = false;
ThreadHelper.join(t);
}
/*
public void println(String x) {
synchronized (this) {
print(x);
newLine();
}
}
*/
// 3. volatile:保持thread可见性
private static volatile boolean running_v = true;
/**
* 3.1. 强制所有线程都去堆内存中读取running,使共享变量线程间可见
* 3.2. 并发修改running依然存在不一致,volatile不能替代synchronized
*/
@Test
public void T3_volatile() {
Thread t = new Thread(() -> {
System.out.println("t, start");
while (running_v) {
}
System.out.println("t, end!");
});
t.start();
ThreadHelper.sleepSeconds(1);
running_v = false;
ThreadHelper.join(t);
}
}
2. 引用类型
volatile
引用类型(包括数组)保证引用本身可见性,不能保证内部字段可见性
/**
* 引用类型(包括数组)保证引用本身可见性,不能保证内部字段可见性
*/
public class T02_volatile_ref {
// 引用类型是无效的
volatile A a = new A();
// running可见
@Test
public void T1_obj() {
Thread t = new Thread(() -> {
System.out.println("start");
while (a.running) {
}
System.out.println("end!");
});
t.start();
ThreadHelper.sleepSeconds(1);
a.running = false;
ThreadHelper.join(t);
}
// running不可见
@Test
public void T2_obj_method() {
Thread t = new Thread(a::run);
t.start();
ThreadHelper.sleepSeconds(1);
a.running = false;
ThreadHelper.join(t);
}
B b = new B();
// running可见
@Test
public void T3_volatile_running() {
Thread t = new Thread(b::run);
t.start();
ThreadHelper.sleepSeconds(1);
b.running = false;
ThreadHelper.join(t);
}
}
class A {
Boolean running = true;
void run() {
System.out.println("start");
while (running) {
}
System.out.println("end!");
}
}
class B {
volatile Boolean running = true;
void run() {
System.out.println("start");
while (running) {
}
System.out.println("end!");
}
}
3. 禁止指令重排序_DCL
/**
* 懒汉式
* DCL:Double_Check_Lock
*/
public class T04_Singleton {
/*
* JIT优化用,`INSTANCE = new Mgr06();`指令重排序
* INSTANCE只进行了半初始化,另一个线程直接得到了这个半初始化的INSTANCE
* volatile禁止INSTANCE重排序
*/
private static volatile T04_Singleton INSTANCE;
private T04_Singleton() {
}
public static T04_Singleton getInstance() {
if (INSTANCE == null) {
// 双重检查
synchronized (T04_Singleton.class) {
if (INSTANCE == null) {
ThreadHelper.sleepMilli(1);
INSTANCE = new T04_Singleton();
}
}
}
return INSTANCE;
}
public void m() {
System.out.println("m");
}
public static void main(String[] args) {
for (int i = 0; i < 100; i++) {
new Thread(() -> System.out.println(T04_Singleton.getInstance().hashCode())).start();
}
}
}
4. 不能保证原子性
/**
* volatile:不能保证原子性
* synchronized:保证原子性
*/
public class T03_volatile_sync {
volatile int count = 0;
/**
* 10个thread对一个变量增加10_000
*/
@Test
public void T1_volatile() {
Thread[] ts = new Thread[10];
for (int i = 0; i < 10; i++) {
ts[i] = new Thread(() -> {
// Non-atomic operation on volatile field 'count'
// volatile进行了非原子操作
for (int j = 0; j < 10_000; j++) count++;
});
}
ThreadHelper.start(ts);
ThreadHelper.join(ts);
System.out.println(count);
}
int num = 0;
@Test
public void T2_sync() {
Thread[] ts = new Thread[10];
for (int i = 0; i < 10; i++) {
ts[i] = new Thread(() -> {
synchronized (this) {
for (int j = 0; j < 10_000; j++) num++;
}
});
}
ThreadHelper.start(ts);
ThreadHelper.join(ts);
System.out.println(num);
}
}
2. 三级缓存
- L1、L2在cpu核中,L3在cpu中。逐级读,逐级存
- 可见性指得是L1、L2、L3间。并不是ThreadLocal
- 从CPU计算单元(ALU)到
Registers | < 1ns |
---|---|
L1 cache | 约1ns |
L2 cache | 约3ns |
L3 cache | 约15ns |
main memory | 约80ns |
3. cache_line(64B)
缓存行
- Block:64byte,折中值
- 缓存行越大,局部性空间效率越高,但读取时间慢
- 缓存行越小,局部性空间效率越低,但读取时间快
- 空间局部性原理:数据按块读取
- 时间局部性原理:指令按块读取
1. CacheLinePadding
/**
* 缓存行(CacheLinePadding)
*/
public class T01_CacheLinePadding {
public static long COUNT = 100_000_000L; // 1亿
private static class T {
// private long p1, p2, p3, p4, p5, p6, p7;
public volatile long x = 0L; // 8B
// private long p9, p10, p11, p12, p13, p14, p15;
}
public static T[] arr = new T[2];
static {
arr[0] = new T();
arr[1] = new T();
}
/*
* 1. 1个属性耗时 > 3个属性耗时
* 缓存行 64B。1个属性arr[0]、arr[1]大概率在同一个缓存行。3个属性arr[0]、arr[1]必不在一个缓存行
* 一个线程修改了arr,触发缓存同步,修改另一个线程arr。耗时变多
*/
public static void main(String[] args) throws Exception {
Thread t1 = new Thread(() -> {
for (long i = 0; i < COUNT; i++) {
arr[0].x = i;
}
});
Thread t2 = new Thread(() -> {
for (long i = 0; i < COUNT; i++) {
arr[1].x = i;
}
});
long start = System.nanoTime();
ThreadHelper.start(t1, t2);
ThreadHelper.join(t1, t2);
System.out.println((System.nanoTime() - start) / 1_000_000 + "ms");
}
}
2. Disruptor
- 写JDK1.7的大神,
LinkedBlockingQueue
运用了这个写法 - Disruptor单机最快MQ。通过cas和CashLinePadding大大提高效率
- RingBuffer
public final class RingBuffer<E> extends RingBufferFields<E> implements Cursored, EventSequencer<E>, EventSink<E>
{
public static final long INITIAL_CURSOR_VALUE = Sequence.INITIAL_VALUE;
protected long p1, p2, p3, p4, p5, p6, p7;
abstract class RingBufferPad
{
protected long p1, p2, p3, p4, p5, p6, p7;
}
3. @Contended
缓存行对齐注解,只有JDK1.8起作用。保证注解的变量独立在一行中
# VM options
-XX:-RestrictContended
/**
* 注意:运行这个小程序的时候,需要加参数:-XX:-RestrictContended
*/
public class T03_Contended {
public static long COUNT = 100_000_000L;
// 只有1.8起作用,保证x位于单独一行中
private static class T {
@Contended
public long x = 0L;
}
public static T[] arr = new T[2];
static {
arr[0] = new T();
arr[1] = new T();
}
public static void main(String[] args) throws Exception {
Thread t1 = new Thread(() -> {
for (long i = 0; i < COUNT; i++) {
arr[0].x = i;
}
});
Thread t2 = new Thread(() -> {
for (long i = 0; i < COUNT; i++) {
arr[1].x = i;
}
});
final long start = System.nanoTime();
ThreadHelper.start(t1, t2);
ThreadHelper.join(t1, t2);
System.out.println((System.nanoTime() - start) / 100_0000);
}
}
4. 缓存一致性MESI
- MESI只是实现缓存一致性协议的一种,是intel设计的。有些无法被缓存的数据,或者跨越多个缓存行的数据,依然必须使用总线锁/缓存锁
- cpu每个cache_line标记四种状态(额外两位)
Modified
Exclusive
Shared
Invalid
2. ordering
1. 乱序
1. 证明
/**
* 1. 本程序跟可见性无关,曾经有同学用单核也发现了这一点
* 2. t1, t2里的语句同时乱序
*/
public class T01_Disorder {
private static int x = 0, y = 0, a = 0, b = 0;
public static void main(String[] args) throws InterruptedException {
for (long i = 0; i < Long.MAX_VALUE; i++) {
x = 0;
y = 0;
a = 0;
b = 0;
CountDownLatch latch = new CountDownLatch(2);
Thread t1 = new Thread(() -> {
a = 1;
x = b;
latch.countDown();
});
Thread t2 = new Thread(() -> {
b = 1;
y = a;
latch.countDown();
});
t1.start();
t2.start();
latch.await();
String result = "第" + i + "次 (" + x + ", " + y + ")";
if (x == 0 && y == 0) {
System.err.println(result);
break;
}
}
}
}
2. 本质
前后两条语句没有依赖关系,有可能乱序。为了提高效率,减少不必要的等待
- 指令1,去内存读数据,速度慢
- 指令2,优先执行
3. 指令重排原则
as-if-serial(单线程最终一致性)
// 单线程无依赖关系,可以乱序
int a = 1;
int b = 1;
// 有依赖关系,必不可乱序
int c = 1;
c++;
2. 程序潜在问题
/**
* 潜在问题
*/
public class T02_PotentialProblem {
private static /*volatile*/ boolean ready = false;
private static int number;
private static class T extends Thread {
@Override
public void run() {
while (!ready) {
// Thread.yield(),使当前线程由running,变成ready,让出cpu时间
// 在下一个线程执行时候,此线程有可能被执行,也有可能没有被执行
Thread.yield();
}
System.out.println(number);
}
}
public static void main(String[] args) throws Exception {
Thread t = new T();
t.start();
number = 42; // 1. 有序性问题:row26 27,可能乱序,number = 0输出
ready = true; // 2. 可见性问题:MESI的主动性,yield()同步刷新,都有可能立即更新业务线程的ready
t.join();
}
}
3. 对象半初始化状态
java:new Object();
成员变量有初始值,这是一个对象的半初始化状态
public class T03_T {
public static void main(String[] args) {
Object o = new Object();
}
}
0 new #2 <java/lang/Object> // 1. 申请内存,成员变量为初始值即默认值。半初始化状态
3 dup // 略过
4 invokespecial #1 <java/lang/Object.<init> : ()V> // 2. 调用构造方法,初始化成员变量。初始化状态
7 astore_1 // 3. 对象和变量建立关联
8 return
1. this逸出
/**
* this逸出
* 2、3调换顺序
* this对象的为属性默认值
*/
public class T04_ThisEscape {
int num = 8;
/**
* 构造方法可以new线程,但是不要启动。否则可能this逸出
*/
public T04_ThisEscape() {
/*
* 1. 开辟空间,属性初始值
* 2. 调用init,属性默认值
* 3. 建立关联,和this
*/
new Thread(() ->
System.out.println(this.num)
).start();
}
public static void main(String[] args) throws Exception {
new T04_ThisEscape();
}
}
// 以下两条指令有可能换序执行。导致this对象具备默认值,而不是初始值
4 invokespecial #1 <java/lang/Object.<init> : ()V>
7 astore_1
4. happens-before
- CPU级别,遵守as-if-serial(最终一致性),指令可以重排
- happens-before,前一个操作的结果对后续操作是可见的,JVM重排原则。对于hotspot来说,底层一条
lock
指令全搞定了
1. JLS17.4.5(略)
不要背,也没人问,知道有这么回事就行了
Java Language Specification(java语言规范)
- 程序次序规则:同一个线程内,按照代码出现的顺序,前面的代码先行于后面的代码,准确的说是控制流顺序,因为要考虑到分支和循环结构
- 管程锁定规则:一个unlock操作先行发生于后面(时间上)对同一个锁的lock操作
- volatile变量规则:对一个volatile变量的写操作先行发生于后面(时间上)对这个变量的读操作
- 线程启动规则:Thread的start( )方法先行发生于这个线程的每一个操作
- 线程终止规则:线程的所有操作都先行于此线程的终止检测。可以通过Thread.join( )方法结束、Thread.isAlive( )的返回值等手段检测线程的终止
- 线程中断规则:对线程interrupt( )方法的调用先行发生于被中断线程的代码检测到中断事件的发生,可以通过Thread.interrupt( )方法检测线程是否中断
- 对象终结规则:一个对象的初始化完成先行于发生它的finalize()方法的开始
- 传递性:如果操作A先行于操作B,操作B先行于操作C,那么操作A先行于操作C
5. CPU屏障
- 内存屏障:特殊指令,阻止乱序。前面的必须执行完,后面的才能执行
- intel_cpu:lfence、sfence、mfence(CPU特有指令)
6. JVM屏障
JVM是一种规范(相当于接口)。所有实现JVM规范的虚拟机,必须实现四个屏障。hotspot是其具体实现之一
- LoadLoad屏障:(Load1; LoadLoad; Load2)
- 在Load2及后续读取操作要读取的数据被访问前,保证Load1要读取的数据被读取完毕
- StoreStore屏障:(Store1; StoreStore; Store2)
- 在Store2及后续写入操作执行前,保证Store1的写入操作对其它处理器可见
- LoadStore屏障:(Load1; LoadStore; Store2)
- 在Store2及后续写入操作被刷出前,保证Load1要读取的数据被读取完毕
- StoreLoad屏障:(Store1; StoreLoad; Load2)
- 在Load2及后续所有读取操作执行前,保证Store1的写入对所有处理器可见
7. volatile底层
- 保持线程可见性
- 禁止指令重排。volatile变量的读写访问,都不可以换顺序
1. jvm层面
- volatile写。之前写完才能写,写完之后才能读
- StoreStoreBarrier => volatile写 => StoreLoadBarrier
- volatile读。读完才能读,读完才能写
- volatile读操作 => LoadLoadBarrier & LoadStoreBarrier
2. hotspot实现
- volatile关键字
- 编译成为.class文件。
ACC_VOLATILE
- hotspot实现
- 屏障两边指令禁止重排,保障有序!
bytecodeinterpreter.cpp
int field_offset = cache->f2_as_index();
if (cache->is_volatile()) {
if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
OrderAccess::fence();
}
orderaccess_linux_x86.inline.hpp
inline void OrderAccess::fence() {
if (os::is_MP()) {
// always use locked addl since mfence is sometimes expensive
#ifdef AMD64
__asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory");
#else
__asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory");
#endif
}
}
3. lock指令
用于在多处理器中执行指令时对共享内存的独占使用
- 能够将当前处理器对应缓存的内容刷新到内存,并使其他处理器对应的缓存失效
- 提供了有序的指令,无法越过这个内存屏障的作用
3. atomicity
java、c++…… 最终都是要搞成机器语言,机器语言翻译过来就是asm汇编语言。asm都可能被其他thread打断
public class T01_IPP {
private static long n = 0L;
/*
* 1. 100个线程共同对一个变量递增10_000
* 一个线程把n读到寄存器n++,还没有写回去。另一个线程也读到了
*
* race condition => 竞争条件,指的是多个线程访问共享数据的时候产生竞争
* 数据不一致(inconsistency),并发访问之下产生的不期望出现的结果
*/
@Test
public void v1() throws Exception {
Thread[] threads = new Thread[100];
CountDownLatch latch = new CountDownLatch(threads.length);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int j = 0; j < 10_000; j++) {
n++;
}
latch.countDown();
});
}
ThreadHelper.start(threads);
latch.await();
System.out.println(n);
}
/*
* 2. synchronized
* synchronized(){}里的语句被当做一个整体不可打断
*
* 上锁本质:并发编程序列化
*/
@Test
public void v2() throws Exception {
Thread[] threads = new Thread[100];
CountDownLatch latch = new CountDownLatch(threads.length);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int j = 0; j < 10_000; j++) {
// 1. 保障原子性。并发序列化
// 2. 保障可见性。一个结束瞬间同步主内存
// 3. 有序性无关。有序性只有一个要求,单线程最终一致性(as if serial)。和锁没有关系
synchronized (T01_IPP.class) {
n++;
}
}
latch.countDown();
});
}
ThreadHelper.start(threads);
latch.await();
System.out.println(n);
}
/**
* 3. ReentrantLock
*/
@Test
public void v3() throws Exception {
Lock lock = new ReentrantLock();
Thread[] threads = new Thread[100];
CountDownLatch latch = new CountDownLatch(threads.length);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int j = 0; j < 10_000; j++) {
lock.lock();
n++;
lock.unlock();
}
latch.countDown();
});
}
ThreadHelper.start(threads);
latch.await();
System.out.println(n);
}
}
1. JVM原子性(指令)
Java中的8大原子操作(JVM级别):(了解即可)
lock
:主内存,标识变量为线程独占unlock
:主内存,解锁线程独占变量read
:主内存,读取内存到线程缓存(工作内存)load
:工作内存,read后的值放入线程本地变量副本use
:工作内存,传值给执行引擎assign
:工作内存,执行引擎结果赋值给线程本地变量store
:工作内存,存值到主内存给write备用write
:主内存,写变量值
2. i++底层
public class Test {
static int i = 0;
public static void main(String[] args) {
i++;
}
}
// i++的Bytecode,中间完全有可能被其他thread打断
// 这些汇编还会继续翻译成更复杂的汇编,更会被打断
0 getstatic #2 <com/listao/juc/p02_feature/atomicity/Test.i : I>
3 iconst_1
4 iadd
5 putstatic #2 <com/listao/juc/p02_feature/atomicity/Test.i : I>
8 return
3. 上锁
// java语句`i++`翻译成汇编是4句,翻译成机器语言没准有10好几句。保证`i++`原子性就要上锁
// synchronized(){}里的语句被当做一个整体不可打断
synchronized (ooxx) {
i++;
}
1. 本质
- 上锁本质:并发编程序列化
- 并发线程争抢同一把锁才串行
public class T02_WhatIsLock {
static Object o = new Object();
CountDownLatch latch = new CountDownLatch(3);
/*
* 1. 并发执行。总时长约2s
* 并行:三个线程同时启动、结束
*/
@Test
public void V1_serial() throws InterruptedException {
Runnable r = () -> {
System.out.println(Thread.currentThread().getName() + " start!");
ThreadHelper.sleepSeconds(2);
System.out.println(Thread.currentThread().getName() + " end!");
latch.countDown();
};
for (int i = 0; i < 3; i++) {
new Thread(r).start();
}
latch.await();
}
/*
* 2. 上锁本质:并发编程序列化
* 三个线程串行。总时长6s
*/
@Test
public void V2_parallel() throws InterruptedException {
Runnable r = () -> {
// 锁定对象
synchronized (o) {
System.out.println(Thread.currentThread().getName() + " start!");
ThreadHelper.sleepSeconds(2);
System.out.println(Thread.currentThread().getName() + " end!");
latch.countDown();
}
};
for (int i = 0; i < 3; i++) {
new Thread(r).start();
}
latch.await();
}
static final Object o1 = new Object();
static final Object o2 = new Object();
static final Object o3 = new Object();
/**
* 3. 并发线程争抢同一把锁才串行
* 3把锁,锁3个对象,依然并发
*/
@Test
public void V3_localOneOrg() throws InterruptedException {
Runnable r1 = () -> {
synchronized (o1) {
System.out.println(Thread.currentThread().getName() + " start!");
ThreadHelper.sleepSeconds(2);
System.out.println(Thread.currentThread().getName() + " end!");
latch.countDown();
}
};
Runnable r2 = () -> {
synchronized (o2) {
System.out.println(Thread.currentThread().getName() + " start!");
ThreadHelper.sleepSeconds(2);
System.out.println(Thread.currentThread().getName() + " end!");
latch.countDown();
}
};
Runnable r3 = () -> {
synchronized (o3) {
System.out.println(Thread.currentThread().getName() + " start!");
ThreadHelper.sleepSeconds(2);
System.out.println(Thread.currentThread().getName() + " end!");
latch.countDown();
}
};
new Thread(r1).start();
new Thread(r2).start();
new Thread(r3).start();
latch.await();
}
}
4. concept
- race_condition:竞争条件,指的是多个线程访问共享数据的时候产生竞争
- unconsistency(数据的不一致):并发访问下产生不期望出现的结果
- 保障数据一致呢?=> 线程同步(线程执行的顺序安排好)
- monitor(管程)=> 锁 =>
synchronized (ooxx)
里的ooxx - critical_section => 临界区 =>
synchronized (ooxx){}
大括号里内容 - 锁的粒度
- 粗:临界区执行时间长,语句多
- 细:临界区执行时间短,语句少
5. 悲观锁、乐观锁
- 悲观锁:悲观的认为操作一定会被别的线程打断。
synchronized
- 乐观锁、自旋锁、无锁:乐观的认为操作做不会被别的线程打断(CAS)
1. 效率
- CPU消耗
- 悲观锁:不占用cpu。等待的线程(waiting || blocked),在等待队列
- 乐观锁:占用cpu。等待的线程一直在自旋,循环 + cpu切换
- 不同的场景
- 悲观锁:临界区执行时间长(锁的粒度比较粗),等待线程多
- 乐观锁:临界区执行时间短(锁的粒度比较细),等待线程少
- 实际应用
- 悲观锁(synchronized)直接用,被进行了一系列优化。其内部既有自旋锁,又有偏向锁,重量级锁,自适应
2. CAS
CAS = Compare And Set/Swap/Exchange(自旋 / 自旋锁 / 无锁 / 乐观锁)
cas(v, a, b)
,变量v,期待值a, 修改值b
1 . ABA问题
- 变量被另一个线程,A => B => A 似乎是没变
- 你的女朋友在离开你的这段儿时间经历了别的人,自旋就是你空转等待,一直等到她接纳你为止
- 解决方案 => version(版本号AtomicStampedReference)
- 基础类型简单值不需要版本号
- A为引用类型时,属性发生变化时。加version
2. 底层原子性保障
- CAS进行两个操作(保障原子性,不能被打断)
- 比较compare
- 赋值set
3. Hotspot理解CAS
/**
* java.util.concurrent.atomic 包下的原子类
*/
public class T1_AtomicInteger {
AtomicInteger count = new AtomicInteger(0);
void m() {
for (int i = 0; i < 10_000; i++)
// count++; 乐观锁
count.incrementAndGet();
}
/**
* 解决同样的问题的更高效的方法,使用AtomicXXX类
* AtomicXXX类方法都是原子性的,不能保证多个方法连续调用是原子性的
*/
public static void main(String[] args) throws InterruptedException {
T1_AtomicInteger t = new T1_AtomicInteger();
List<Thread> threads = new ArrayList<>();
for (int i = 0; i < 100; i++) {
threads.add(new Thread(t::m, "thread-" + i));
}
threads.forEach(Thread::start);
ThreadHelper.join(threads);
System.out.println(t.count);
}
}
结论:乐观锁底层,仍然有个悲观锁在保证
AtomicInteger
/**
* Atomically increments by one the current value.
*
* @return the updated value
*/
public final int incrementAndGet() {
return unsafe.getAndAddInt(this, valueOffset, 1) + 1;
}
Unsafe.class
// 不断CAS
public final int getAndAddInt(Object var1, long var2, int var4) {
int var5;
do {
var5 = this.getIntVolatile(var1, var2);
} while(!this.compareAndSwapInt(var1, var2, var5, var5 + var4));
return var5;
}
// native,C++代码
public final native boolean compareAndSwapInt(Object var1, long var2, int var4, int var5);
jdk8u: unsafe.cpp ==> row_5
cmpxchg = compare and exchange/set/swap
UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x))
UnsafeWrapper("Unsafe_CompareAndSwapInt");
oop p = JNIHandles::resolve(obj);
jint* addr = (jint *) index_oop_from_field_offset_long(p, offset);
return (jint)(Atomic::cmpxchg(x, addr, e)) == e;
UNSAFE_END
jdk8u: atomic_linux_x86.inline.hpp ==> row_3
is_MP = Multi Processors 是否多核
inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) {
int mp = os::is_MP();
__asm__ volatile (LOCK_IF_MP(%4) "cmpxchgl %1,(%3)"
: "=a" (exchange_value)
: "r" (exchange_value), "a" (compare_value), "r" (dest), "r" (mp)
: "cc", "memory");
return exchange_value;
}
jdk8u: os.hpp ==> is_MP()
static inline bool is_MP() {
// During bootstrap if _processor_count is not yet initialized
// we claim to be MP as that is safest. If any platform has a
// stub generator that might be triggered in this phase and for
// which being declared MP when in fact not, is a problem - then
// the bootstrap routine for the stub generator needs to check
// the processor count directly and leave the bootstrap routine
// in place until called after initialization has ocurred.
return (_processor_count != 1) || AssumeMP;
}
jdk8u: atomic_linux_x86.inline.hpp
#define LOCK_IF_MP(mp) "cmp $0, " #mp "; je 1f; lock; 1: "
- CPU级别指令,最终实现
- cmpxchg => CAS修改变量值,非原子操作,进行
lock
- lock指令在执行的时候视情况采用缓存锁或者总线锁
- lock单核不用加
lock cmpxchg 指令
4. unSafe
// 直接操作内存
Unsafe.getUnsafe().allocateMemory();
Unsafe.getUnsafe().freeMemory();
// 直接操作实例
Unsafe.getUnsafe().allocateInstance();
// 直接操作类
Unsafe.getUnsafe().getInt();
// CAS操作
Unsafe.getUnsafe().compareAndSwapObject();
@Slf4j
public class T04_Unsafe {
public static void main(String[] args) throws Exception {
Field unsafeField = Unsafe.class.getDeclaredFields()[0];
unsafeField.setAccessible(true);
Unsafe unsafe = (Unsafe) unsafeField.get(null);
// 查看类中属性offset
Field f = A.class.getDeclaredField("i");
long offset = unsafe.objectFieldOffset(f);
System.out.println("offset = " + offset);
final A a = new A();
// CAS
log.debug("CAS 0 ==> 1: {}", unsafe.compareAndSwapInt(a, offset, 0, 1));
log.debug("a.i: {}", a.i);
log.debug("CAS 0 ==> 2: {}", unsafe.compareAndSwapInt(a, offset, 0, 2));
log.debug("a.i: {}", a.i);
// 分配内存
// unsafe.allocateMemory();
// 释放内存
// unsafe.freeMemory();
}
}
class A {
int i = 0;
}
3. Atomic
1. AtomicInteger
- jdk提供一些常见原子操作类(CAS)
- 3. Hotspot理解CAS
2. LongAdder
/*
* time: LongAdder < Atomic < Sync(耗时要具体问题具体分析)
* LongAdder: 分段锁
* Atomic: CAS
* Sync: 乐观锁
*/
public class T2_LongAdder {
static long count = 0L;
static AtomicLong atomicLong_count = new AtomicLong(0L);
static LongAdder longAdder_count = new LongAdder();
static final int loop = 10_000;
static final int threadSize = 1_000; // size越大LongAdder越快
public static void main(String[] args) throws Exception {
Thread[] threads = new Thread[threadSize];
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int k = 0; k < loop; k++) atomicLong_count.incrementAndGet();
});
}
long start = System.currentTimeMillis();
ThreadHelper.start(threads);
ThreadHelper.join(threads);
System.out.println("Atomic: " + atomicLong_count.get() + ", time: " + (System.currentTimeMillis() - start));
// ------------------------------------------------------------------------------------------------
final Object lock = new Object();
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int k = 0; k < loop; k++)
synchronized (lock) {
count++;
}
});
}
start = System.currentTimeMillis();
ThreadHelper.start(threads);
ThreadHelper.join(threads);
System.out.println("Sync: " + count + ", time: " + (System.currentTimeMillis() - start));
// ------------------------------------------------------------------------------------------------
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int k = 0; k < loop; k++)
longAdder_count.increment();
});
}
start = System.currentTimeMillis();
ThreadHelper.start(threads);
ThreadHelper.join(threads);
System.out.println("LongAdder: " + atomicLong_count.longValue() + ", time: " + (System.currentTimeMillis() - start));
}
}
4. synchronized
锁不仅仅是关于同步与互斥的,也是关于内存可见的。为了保证所有线程都能够看到共享的,可变变量的最新值,读取和写入线程必须使用公共的锁进行同步
- 保障原子性,并发序列化
- 保障可见性,解锁时刷新内存
- 和有序性无关,有序性只有一个要求,单线程最终一致性。和锁没有关系
1. Concept
/**
* synchronized(),对某个对象加锁
*/
public class T1_Sync {
private static int count = 10;
final Object o = new Object();
/**
* 1. 任何线程执行下面的代码,必须先拿到o对象的锁
* 2. 想锁谁就锁谁,可以对任何对象进行锁定
* 3. 本质上是对象的markword指向该线程
* 4. o不能用String常量、Integer、Long基本数据类型
*/
public void v1() {
synchronized (o) {
count--;
System.out.println(Thread.currentThread().getName() + " count = " + count);
}
}
/**
* 每次new对象太麻烦,synchronized (this)
*/
public void v2() {
synchronized (this) {
count--;
System.out.println(Thread.currentThread().getName() + " count = " + count);
}
}
/**
* == synchronized (this)
*/
public synchronized void v3() {
count--;
System.out.println(Thread.currentThread().getName() + " count = " + count);
}
/**
* .class文件load到内存都会生成对应的Class对象
*/
public static void v4() {
synchronized (T1_Sync.class) {
count--;
}
}
/**
* == synchronized (T.class)
*/
public synchronized static void v5() {
count--;
System.out.println(Thread.currentThread().getName() + " count = " + count);
}
}
2. 验证锁的问题
/*
* 面试题:模拟银行账户
* 业务写方法加锁,业务读方法不加锁,这样行不行?
*
* 容易产生脏读问题(dirtyRead)
* 加锁效率低一百倍
*/
@ToString
public class T1_DirtyRead {
String name;
double balance;
public synchronized void set(String name, double balance) {
this.name = name;
ThreadHelper.sleepSeconds(2);
this.balance = balance;
}
public T1_DirtyRead get() {
return this;
}
/**
* getSync()和set()的lock都为this
*/
public synchronized T1_DirtyRead getSync() {
return this;
}
// T1_DirtyRead(name=zhangsan, balance=0.0)
// T1_DirtyRead(name=zhangsan, balance=100.0)
@Test
public void T1_async() {
T1_DirtyRead a = new T1_DirtyRead();
Thread t = new Thread(() -> a.set("zhangsan", 100.0));
t.start();
ThreadHelper.sleepSeconds(1);
System.out.println(a.get()); // dirty_read
ThreadHelper.sleepSeconds(2);
System.out.println(a.get());
ThreadHelper.join(t);
}
@Test
public void T2_sync() {
T1_DirtyRead a = new T1_DirtyRead();
Thread t = new Thread(() -> a.set("zhangsan", 100.0));
t.start();
ThreadHelper.sleepSeconds(1);
System.out.println(a.getSync());
ThreadHelper.join(t);
}
}
public class T01_IPP {
private static long n = 0L;
/*
* 1. 100个线程共同对一个变量递增10_000
* 一个线程把n读到寄存器n++,还没有写回去。另一个线程也读到了
*
* race condition => 竞争条件,指的是多个线程访问共享数据的时候产生竞争
* 数据不一致(inconsistency),并发访问之下产生的不期望出现的结果
*/
@Test
public void v1() throws Exception {
Thread[] threads = new Thread[100];
CountDownLatch latch = new CountDownLatch(threads.length);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int j = 0; j < 10_000; j++) {
n++;
}
latch.countDown();
});
}
ThreadHelper.start(threads);
latch.await();
System.out.println(n);
}
/*
* 2. synchronized
* synchronized(){}里的语句被当做一个整体不可打断
*
* 上锁本质:并发编程序列化
*/
@Test
public void v2() throws Exception {
Thread[] threads = new Thread[100];
CountDownLatch latch = new CountDownLatch(threads.length);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int j = 0; j < 10_000; j++) {
// 1. 保障原子性。并发序列化
// 2. 保障可见性。一个结束瞬间同步主内存
// 3. 有序性无关。有序性只有一个要求,单线程最终一致性(as if serial)。和锁没有关系
synchronized (T01_IPP.class) {
n++;
}
}
latch.countDown();
});
}
ThreadHelper.start(threads);
latch.await();
System.out.println(n);
}
/**
* 3. ReentrantLock
*/
@Test
public void v3() throws Exception {
Lock lock = new ReentrantLock();
Thread[] threads = new Thread[100];
CountDownLatch latch = new CountDownLatch(threads.length);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread(() -> {
for (int j = 0; j < 10_000; j++) {
lock.lock();
n++;
lock.unlock();
}
latch.countDown();
});
}
ThreadHelper.start(threads);
latch.await();
System.out.println(n);
}
}
3. Reentrant
synchronized
是可重入锁。a()
调用b()
,a、b都请求同一把锁,需重入。重入次数、解锁次数必须对应- 重入记录方式
- 偏向锁、自旋锁:线程栈里,每重入一次,LR++。LR出栈即解锁
- 轻量级锁:同上
- 重量级锁:记录在objectMonitor字段上,经过OS
public class T2_Sync_Reentrant {
synchronized void m1() {
System.out.println("m1 start");
ThreadHelper.sleepSeconds(1);
// lock_reentrant
m2();
System.out.println("m1 end");
}
synchronized void m2() {
ThreadHelper.sleepSeconds(2);
System.out.println("m2");
}
/*
* 1. sync可重入性
* 1. 一个同步方法调用另一个同步方法
* 2. 一个线程已经拥有某个对象的锁,再次申请的时候lock++
*/
public static void main(String[] args) {
new T2_Sync_Reentrant().m1();
System.out.println("————————————————————");
new Child().m2();
}
}
/**
* 2. `extends`子类重写sync的方法锁都加到子类上
*/
class Child extends T2_Sync_Reentrant {
@Override
synchronized void m2() {
System.out.println("child m2 start");
// super加锁是加到Child对象上
super.m2();
System.out.println("child m2 end");
}
}
4. Exception
- 抛出异常,默认锁被释放
- 其他线程会进入同步代码区,有可能会访问到异常产生时的数据
/**
* 小心处理sync业务逻辑中的异常
* 1. 抛出异常,默认锁被释放
* 2. 其他线程会进入同步代码区,有可能会访问到异常产生时的数据
*/
@Slf4j
public class T3_Sync_Exception {
int count = 0;
void m() {
log.error("start");
synchronized (this) {
while (true) {
count++;
log.error("count = {}", count);
ThreadHelper.sleepSeconds(1);
if (count == 5) {
// 1. 抛出异常,默认锁被释放
throw new ArithmeticException();
}
}
}
}
public static void main(String[] args) {
T3_Sync_Exception t = new T3_Sync_Exception();
new Thread(t::m, "t1").start();
ThreadHelper.sleepSeconds(2);
// 2. t2原本不可能执行。t1异常,锁被释放。t2乱入,t2拿到t1数据
new Thread(t::m, "t2").start();
}
}
5. Optimize
- 锁粒度适中
- sync对象final
- sync对象不能为字符串常量
- 用到了一个类库,在该类库中代码锁定了字符串
hello
,却读不到源码
- 用到了一个类库,在该类库中代码锁定了字符串
/*
* 1. 锁粒度适中
* 2. sync对象final
* 3. sync对象不能为字符串常量
*/
public class T4_Sync_Optimize {
int count = 0;
/*
* 1. 锁粒度适中
* 锁粒度细化:同步代码块中的语句越少越好
* 锁粒度粗化:代码块,太细
*/
synchronized void m1() {
ThreadHelper.sleepSeconds(2);
// 业务逻辑中只有下面这句需要sync,这时不应该给整个方法上锁
count++;
ThreadHelper.sleepSeconds(2);
}
void m2() {
ThreadHelper.sleepSeconds(2);
synchronized (this) {
count++;
}
ThreadHelper.sleepSeconds(2);
}
// --------------------------------------------------------------------------------
/*final*/ Object o = new Object(); // 大神写代码严谨
void m3() {
synchronized (o) {
while (true) {
ThreadHelper.sleepSeconds(1);
System.out.println(Thread.currentThread().getName());
}
}
}
/*
* 2. obj加锁,final修饰
* 1. o属性发生改变,不影响锁
* 2. o变成另外一个对象,则锁定的对象发生改变
*/
@Test
public void T1_final() {
T4_Sync_Optimize t = new T4_Sync_Optimize();
new Thread(t::m3, "t1").start();
ThreadHelper.sleepSeconds(3);
Thread t2 = new Thread(t::m3, "t2");
// 锁对象发生改变,所以t2线程得以执行,如果注释掉这句话,线程2将永远得不到执行机会
t.o = new Object();
t2.start();
ThreadHelper.sleepSeconds(2);
}
// --------------------------------------------------------------------------------
/*
* 3. 字符串常量不能作为sync对象
* eg:m4和m5锁定同一个对象
*/
String s1 = "Hello";
String s2 = "Hello";
void m4() {
synchronized (s1) {
System.out.println("m4");
LockSupport.park();
}
}
void m5() {
synchronized (s2) {
System.out.println("m5");
}
}
/*
* m5竞争不到锁
*/
@Test
public void T2_String() {
T4_Sync_Optimize optimize = new T4_Sync_Optimize();
new Thread(optimize::m4, "t1").start();
ThreadHelper.sleepSeconds(1);
Thread t = new Thread(optimize::m5, "t2");
t.start();
ThreadHelper.join(t);
}
}
6. 用户态、内核态
- 内核态:执行在内核,能够访问所有指令
- 用户态:只能访问用户权限内的指令
- eg:网卡的内容,显卡的内容……访问不了
- JDK早期,
synchronized
叫做重量级锁。JVM(用户态)申请锁资源必须通过kernel(内核态),系统调用 - 汇编语言,用户态指向内核态的系统调用
;hello.asm
;write(int fd, const void *buffer, size_t nbytes)
section data
msg db "Hello", 0xA
len equ $ - msg
section .text
global _start
_start:
mov edx, len
mov ecx, msg
mov ebx, 1 ; 文件描述符1 std_out
mov eax, 4 ; write函数系统调用号 4
int 0x80
mov ebx, 0
mov eax, 1 ; exit函数系统调用号
int 0x80
7. JOL
- Java_Object_Layout
- 对象的内存布局(HotSpot实现),8B对齐
- markword(8B)
- classPointer(4B)。类型指针,指向
obj.class
- 成员变量
- 对齐。8B整数倍
1. Tool
<dependency>
<groupId>org.openjdk.jol</groupId>
<artifactId>jol-core</artifactId>
<version>0.9</version>
</dependency>
- 一定要JDK8,JDK11中
new Object()
就为匿名偏向 - synchronized原理
- 打印出的内容,按字节逆序
00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001
/**
* JOL(java_object_layout)
* java对象内存分布,和jdk_version有关
*/
public class T6_JOL {
/*
* 1. 刚new()出来的对象,无锁
*
* java.lang.Object object internals:
* OFFSET SIZE TYPE DESCRIPTION VALUE
* 0 4 (object header) 01 00 00 00 (00000001 00000000 00000000 00000000) (1)
* 4 4 (object header) 00 00 00 00 (00000000 00000000 00000000 00000000) (0)
* 8 4 (object header) 28 0f 00 00 (00101000 00001111 00000000 00000000) (3880)
* 12 4 (loss due to the next object alignment)
* Instance size: 16 bytes
* Space losses: 0 bytes internal + 4 bytes external = 4 bytes total
*/
@Test
public void new_obj() {
Object o = new Object();
System.out.println(ClassLayout.parseInstance(o).toPrintable());
}
/*
* 2. synchronized()对象,轻量级锁000
*
* java.lang.Object object internals:
* OFFSET SIZE TYPE DESCRIPTION VALUE
* 0 4 (object header) 50 7f 94 6d (01010000 01111111 10010100 01101101) (1838448464)
* 4 4 (object header) 01 00 00 00 (00000001 00000000 00000000 00000000) (1)
* 8 4 (object header) 28 0f 00 00 (00101000 00001111 00000000 00000000) (3880)
* 12 4 (loss due to the next object alignment)
* Instance size: 16 bytes
* Space losses: 0 bytes internal + 4 bytes external = 4 bytes total
*/
@Test
public void spinlock() {
Object o = new Object();
synchronized (o) {
System.out.println(ClassLayout.parseInstance(o).toPrintable());
}
}
/*
* 3. sleepSeconds(5)后new()对象,匿名偏向锁101 => 偏向锁101,偏向锁BiasedLock
* 或者开启-XX:BiasedLockingStartupDelay=0(取消匿名偏向延迟)
*
* java.lang.Object object internals:
* OFFSET SIZE TYPE DESCRIPTION VALUE
* 0 4 (object header) 05 00 00 00 (00000101 00000000 00000000 00000000) (5)
* 4 4 (object header) 00 00 00 00 (00000000 00000000 00000000 00000000) (0)
* 8 4 (object header) 28 0f 00 00 (00101000 00001111 00000000 00000000) (3880)
* 12 4 (loss due to the next object alignment)
* Instance size: 16 bytes
* Space losses: 0 bytes internal + 4 bytes external = 4 bytes total
*
* java.lang.Object object internals:
* OFFSET SIZE TYPE DESCRIPTION VALUE
* 0 4 (object header) 05 90 80 5b (00000101 10010000 10000000 01011011) (1535152133)
* 4 4 (object header) 01 00 00 00 (00000001 00000000 00000000 00000000) (1)
* 8 4 (object header) 28 0f 00 00 (00101000 00001111 00000000 00000000) (3880)
* 12 4 (loss due to the next object alignment)
* Instance size: 16 bytes
* Space losses: 0 bytes internal + 4 bytes external = 4 bytes total
*/
@Test
public void biasedLock() {
ThreadHelper.sleepSeconds(5);
Object o = new Object();
System.out.println(ClassLayout.parseInstance(o).toPrintable());
synchronized (o) {
System.out.println(ClassLayout.parseInstance(o).toPrintable());
}
}
}
2. markword
- markword信息
- GC信息
- hashCode(identity_hashCode)
jdk8u: markOop.hpp
// Bit-format of an object header (most significant first, big endian layout below):
//
// 32 bits:
// --------
// hash:25 ------------>| age:4 biased_lock:1 lock:2 (normal object)
// JavaThread*:23 epoch:2 age:4 biased_lock:1 lock:2 (biased object)
// size:32 ------------------------------------------>| (CMS free block)
// PromotedObject*:29 ---------->| promo_bits:3 ----->| (CMS promoted object)
//
// 64 bits:
// --------
// unused:25 hash:31 -->| unused:1 age:4 biased_lock:1 lock:2 (normal object)
// JavaThread*:54 epoch:2 unused:1 age:4 biased_lock:1 lock:2 (biased object)
// PromotedObject*:61 --------------------->| promo_bits:3 ----->| (CMS promoted object)
// size:64 ----------------------------------------------------->| (CMS free block)
//
// unused:25 hash:31 -->| cms_free:1 age:4 biased_lock:1 lock:2 (COOPs && normal object)
// JavaThread*:54 epoch:2 cms_free:1 age:4 biased_lock:1 lock:2 (COOPs && biased object)
// narrowOop:32 unused:24 cms_free:1 unused:4 promo_bits:3 ----->| (COOPs && CMS promoted object)
// unused:21 size:35 -->| cms_free:1 unused:7 ------------------>| (COOPs && CMS free block)
- biased_lock:Obj是否启用偏向锁标记,只占1bit
biased_lock | lock | 状态 |
---|---|---|
0 | 01 | 无锁 |
1 | 01 | 偏向锁 |
0 | 00 | 轻量级锁 |
0 | 10 | 重量级锁 |
0 | 11 | GC标记 |
8. promotion
new
=> 《普通对象》synchronized
=> 《偏向锁》- 轻度竞争 => 乐观锁(自旋锁、轻量级锁、无锁)
- 重度竞争 => 悲观锁(重量级锁)
1. 用户空间锁, 重量级锁
- 用户空间锁:不需要经过OS(偏向锁,轻量级锁)
- 重量级锁:需要向kernel申请
2. 偏向锁
- 多数的代码段实际以单线程运行,没必要设置竞争机制。所以先设置偏向锁
- 将thread_id设置到markword中(54B)
// 其中有synchronize(),保证线程安全
StringBuffer
1. 启动
- 默认
synchronized(o)
,00 => 轻量级锁 - 默认情况,偏向锁有个时延,默认是4s
- JVM有一些默认启动线程,有好多sync代码。启动时肯定会有竞争(明确)。如果使用偏向锁,就会造成偏向锁不断的进行锁撤销和锁升级的操作,效率较低
# BiasedLocking启动延迟设置
-XX:BiasedLockingStartupDelay=0
2. 匿名偏向锁, 普通对象
// 101后面会记录偏向锁的线程指针,刚开始没有偏向任何一个线程,所以叫做匿名偏向
00000001 00000000 00000000 00000000 // 普通对象
00000101 00000000 00000000 00000000 // 匿名偏向锁
00001101 10001000 10000000 00110100 // 偏向锁
3. 自旋锁
- LR(lock_record):锁记录
- 每个线程都有自己的线程栈
- 当有竞争时,两个线程栈中生成LR,两个线程用自旋的方式,将LR更新到markword
- LR备份hashCode(displaced_markword),LR从栈弹出来解锁
4. 重量级锁
- markword记录ObjectMonitor(jvm空间的一个cpp对象),这个对象内部访问的时候通过OS,拿到OS对应的锁
- 升级重量级锁:向OS申请资源,linux_mutex,CPU从3级-0级系统调用,线程挂起,进入等待队列,等待OS的调度,然后再映射回用户空间
1. 何时升级重量级锁
- 1.6及以前。竞争加剧:有线程超过10次自旋,
-XX:PreBlockSpin
控制, 或者自旋线程数超过CPU核数的一半 - 1.6之后。加入自适应自旋
Adapative Self Spinning
, JVM自己控制
2. why自旋锁, 重量级锁
- 自旋消耗CPU资源的,锁的时间长,或者自旋thread多,CPU被大量消耗
- 重量级锁有等待队列,拿不到锁的thread进行等待队列,不需要消耗CPU
3. 等待队列
- 悲观锁(ObjectMonitor)里有各种队列,contentionList、entranceList、waitSet进行竞争、执行、等待
- C++代码,竞争激烈,乐观线程被冻结放入waitSet中。等待OS调度使用
5. 横切面详解
1. java源码 & Bytecode
syncronzied
自动上锁,自动释放锁
@Test
public void spinlock() {
Object o = new Object();
synchronized (o) {
System.out.println(ClassLayout.parseInstance(o).toPrintable());
}
}
0 new #2 <java/lang/Object>
3 dup
4 invokespecial #1 <java/lang/Object.<init> : ()V>
7 astore_1
8 aload_1
9 dup
10 astore_2
11 monitorenter # => 1.
12 getstatic #3 <java/lang/System.out : Ljava/io/PrintStream;>
15 aload_1
16 invokestatic #4 <org/openjdk/jol/info/ClassLayout.parseInstance : (Ljava/lang/Object;)Lorg/openjdk/jol/info/ClassLayout;>
19 invokevirtual #5 <org/openjdk/jol/info/ClassLayout.toPrintable : ()Ljava/lang/String;>
22 invokevirtual #6 <java/io/PrintStream.println : (Ljava/lang/String;)V>
25 aload_2
26 monitorexit # => 2. jvm汇编,第二个monitorexit是遇到异常解锁
27 goto 35 (+8)
30 astore_3
31 aload_2
32 monitorexit # => 3.
33 aload_3
34 athrow
35 return
2. Hotspot
InterpreterRuntime.cpp
monitorenter() => fast_enter()
IRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter(JavaThread* thread, BasicObjectLock* elem))
#ifdef ASSERT
thread->last_frame().interpreter_frame_verify_monitor(elem);
#endif
if (PrintBiasedLockingStatistics) {
Atomic::inc(BiasedLocking::slow_path_entry_count_addr());
}
Handle h_obj(thread, elem->obj());
assert(Universe::heap()->is_in_reserved_or_null(h_obj()),
"must be NULL or an object");
if (UseBiasedLocking) { // 偏向锁
// Retry fast entry if bias is revoked to avoid unnecessary inflation
ObjectSynchronizer::fast_enter(h_obj, elem->lock(), true, CHECK);
} else {
ObjectSynchronizer::slow_enter(h_obj, elem->lock(), CHECK);
}
assert(Universe::heap()->is_in_reserved_or_null(elem->obj()),
"must be NULL or an object");
#ifdef ASSERT
thread->last_frame().interpreter_frame_verify_monitor(elem);
#endif
IRT_END
synchronizer.cpp
fast_enter() => revoke_and_rebias()
void ObjectSynchronizer::fast_enter(Handle obj, BasicLock* lock, bool attempt_rebias, TRAPS) {
if (UseBiasedLocking) {
if (!SafepointSynchronize::is_at_safepoint()) {
BiasedLocking::Condition cond = BiasedLocking::revoke_and_rebias(obj, attempt_rebias, THREAD);
if (cond == BiasedLocking::BIAS_REVOKED_AND_REBIASED) {
return;
}
} else {
assert(!attempt_rebias, "can not rebias toward VM thread");
BiasedLocking::revoke_at_safepoint(obj);
}
assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
}
slow_enter (obj, lock, THREAD) ;
}
slow_enter() => cmpxchg_ptr() 尝试自旋锁 => inflate() 膨胀为重量级锁
void ObjectSynchronizer::slow_enter(Handle obj, BasicLock* lock, TRAPS) {
markOop mark = obj->mark();
assert(!mark->has_bias_pattern(), "should not see bias pattern here");
if (mark->is_neutral()) {
// Anticipate successful CAS -- the ST of the displaced mark must
// be visible <= the ST performed by the CAS.
lock->set_displaced_header(mark);
if (mark == (markOop) Atomic::cmpxchg_ptr(lock, obj()->mark_addr(), mark)) {
TEVENT (slow_enter: release stacklock) ;
return ;
}
// Fall through to inflate() ...
} else
if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) {
assert(lock != mark->locker(), "must not re-lock the same lock");
assert(lock != (BasicLock*)obj->mark(), "don't relock with same BasicLock");
lock->set_displaced_header(NULL);
return;
}
#if 0
// The following optimization isn't particularly useful.
if (mark->has_monitor() && mark->monitor()->is_entered(THREAD)) {
lock->set_displaced_header (NULL) ;
return ;
}
#endif
// The object header will never be displaced to this lock,
// so it does not matter what the value is, except that it
// must be non-zero to avoid looking like a re-entrant lock,
// and must not look locked either.
lock->set_displaced_header(markOopDesc::unused_mark());
ObjectSynchronizer::inflate(THREAD, obj())->enter(THREAD);
}
7. 批量重偏向, 批量撤销
关于epoch: (不重要)
批量重偏向与批量撤销:
- 从偏向锁的加锁解锁过程中可看出,当只有一个线程反复进入同步块时,偏向锁带来的性能开销基本可以忽略
- 当有其他线程尝试获得锁时,就需要等到
safe point
,再将偏向锁撤销为无锁状态或升级为轻量级,会消耗一定的性能,所以在多线程竞争频繁的情况下,偏向锁不仅不能提高性能,还会导致性能下降。于是,就有了批量重偏向与批量撤销的机制
原理:以class为单位
- 批量重偏向(bulk_rebias):一个线程创建了大量Obj并执行了初始的同步操作,后来另一个线程也来将这些Obj作为锁Obj进行操作,会导致大量的偏向锁撤销操作
- 批量撤销(bulk_revoke):在明显多线程竞争剧烈的场景下使用偏向锁是不合适的
一个偏向锁撤销计数器,每一次该class的Obj发生偏向撤销操作时,该计数器+1,当这个值达到重偏向阈值(默认20)时,JVM就认为该class的偏向锁有问题,因此会进行批量重偏向。每个class对象会有一个对应的epoch字段,每个处于偏向锁状态对象的MarkWord中也有该字段,其初始值为创建该对象时class中的epoch的值。每次发生批量重偏向时,就将该值+1,同时遍历JVM中所有线程的栈,找到该class所有正处于加锁状态的偏向锁,将其epoch字段改为新值。下次获得锁时,发现当前对象的epoch值和class的epoch不相等,那就算当前已经偏向了其他线程,也不会执行撤销操作,而是直接通过CAS操作将其MarkWord的Thread_Id 改成当前线程Id。当达到重偏向阈值后,假设该class计数器继续增长,当其达到批量撤销的阈值后(默认40),JVM就认为该class的使用场景存在多线程竞争,会标记该class为不可偏向,之后,对于该class的锁,直接走轻量级锁的逻辑