多线程局部存储技术

问题

多线程上下文中，每个线程需要使用一个专属的全局变量，该如何实现？

代码示例

一种可能的解决方案

test1.c

#define _GNU_SOURCE     /* To get pthread_getattr_np() declaration */
#define _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200809L
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <semaphore.h>

#define g_value (global->value)
#define g_fl    (global->fl)
#define g_str   (global->str)

#define func1(i, j)      func1_(global, i, j)
#define func2(s)         func2_(global, s)
#define more_func_call() more_func_call_(global)

typedef struct
{
    int value;
    float fl;
    char* str;
} ThreadGlobal;

void func1_(ThreadGlobal* global, int i, int j)
{
    g_fl = 1.0 * i / j;
}

void func2_(ThreadGlobal* global, char* s)
{
    g_str = s;
}

void more_func_call_(ThreadGlobal* global)
{
    printf("g_value = %u\n", g_value);
    printf("g_fl = %f\n", g_fl);
    printf("g_str = %s\n", g_str);
}

void* child_thread(void* arg)
{   
    ThreadGlobal* global = arg;
    
    /* global variable initialization */
    g_value = pthread_self();
    g_fl = 0;
    g_str = "Delphi Tang";
    
    /* function call */
    func1(1, rand() % 10);
    func2("Test");
    
    more_func_call();
       
    return NULL;
}

int main()
{
    pthread_t t = {0};
    int i = 0;
    
    for(i=0; i<5; i++)
    {
        pthread_create(&t, 
                       NULL, 
                       child_thread, 
                       malloc(sizeof(ThreadGlobal)));
    }

    sleep(3);
  
    return 0;
}

ThreadGlobal 为我们定义的包含多种基本数据类型的结构体

第 67 行，在创建子线程的时候，我们会 malloc 一个 ThreadGlobal 这个结构体，将这个指针传入每个子线程，这样每个子线程都有唯一的变量

但在后续的子线程在函数调用的时候，我们需要将指向 ThreadGlobal 类型的指针作为函数的参数传入来区分每个线程专属的全局变量，这边我们使用了宏定义，优化了这个问题

程序运行结果如下图所示：

方案缺陷分析

NPTL 解决方案

test2.c

#define _GNU_SOURCE     /* To get pthread_getattr_np() declaration */
#define _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200809L
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <semaphore.h>

static pthread_key_t g_thread_global;

void func_1()
{
    int value = (long long)pthread_getspecific(g_thread_global);
    
    printf("value = %d\n", value);
}

void* thread_1(void* arg)
{    
    // set global to 255
    pthread_setspecific(g_thread_global, (void*)255);
    
    func_1(); // call to access local global

    // avoid free error
    pthread_setspecific(g_thread_global, 0);
       
    return NULL;
}


void func_2()
{
    char* s = pthread_getspecific(g_thread_global);
    
    printf("s = %s\n", s);
}

void* thread_2(void* arg)
{   
    char* pc = malloc(16);
    
    // set thread local global
    strcpy(pc, "Delphi Tang"); 
    
    printf("pc = %p\n", pc);
    
    pthread_setspecific(g_thread_global, pc);
    
    func_2(); // call to access local global
       
    return NULL;
}

void thread_delete(void* key)
{
    printf("key = %p\n", key);
    
    free(key);
}

int main()
{
    pthread_t t1 = {0};
    pthread_t t2 = {0};
    
    pthread_key_create(&g_thread_global, thread_delete);

    pthread_create(&t1, NULL, thread_1, NULL);
    pthread_create(&t2, NULL, thread_2, NULL);

    pthread_join(t1, NULL);
    pthread_join(t2, NULL);
    
    pthread_key_delete(g_thread_global);
  
    return 0;
}

程序运行结果如下图所示：

NPTL 方案缺陷分析

NPTL 线程局部存储相关函数太难用，并且使用其编写的代码可读性差！

NPTL 方案的原理剖析

深度思考

NPTL 方案的编码细节是否可自动完成 (编译器自动完成)

传统 C 语言全局变量是否可拓展出多份拷贝 (每个线程一个拷贝)

如何区分传统的全局变量和拓展的全局变量？

什么时候使用拓展的全局变量？

GCC 解决方案

__thread int g_global = 0;

被 __thread 修饰的变量，每个线程都会有该变量的一份拷贝

如果使用了 static 或 extern 关键字，那么 __thread 位于其后

__thread 变量可在声明时进行初始化，也可使用 & 操作符获取地址值

GCC 解决方案示例

多线程局部存储技术

test3.c

#define _GNU_SOURCE     /* To get pthread_getattr_np() declaration */
#define _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200809L
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <semaphore.h>

static __thread long long g_global;

void func_1()
{
    printf("v = %lld\n", g_global);
}

void* thread_1(void* arg)
{   
    g_global = 255;  // set thread local global
    
    printf("thread_1 : &g_global = %p\n", &g_global);
    
    func_1(); // call to access local global
       
    return NULL;
}


void func_2()
{
    printf("s = %s\n", (char*)g_global);
}

void* thread_2(void* arg)
{   
    g_global = (long long)malloc(16);
    
    // set thread local global
    strcpy((char*)g_global, "Delphi Tang"); 
    
    printf("thread_2 : &g_global = %p\n", &g_global);

    func_2(); // call to access local global
       
    return NULL;
}

/*
void thread_delete(void* key)
{
    printf("key = %p\n", key);
    
    free(key);
}
*/

int main()
{
    pthread_t t1 = {0};
    pthread_t t2 = {0};
    
    // pthread_key_create(&g_thread_global, thread_delete);

    pthread_create(&t1, NULL, thread_1, NULL);
    pthread_create(&t2, NULL, thread_2, NULL);

    pthread_join(t1, NULL);
    pthread_join(t2, NULL);
    
    // pthread_key_delete(g_thread_global);
  
    return 0;
}

第 10 行，我们通过 __thread 关键字来修饰变量 g_global，使得 g_global 变量在每个线程都有一份拷贝

在 thread_1 线程中，我们将 g_global 赋值为一个整形；在 thread_2 线程中，我们将 g_global 赋值为一个指向字符串的指针；在两个线程中打印 g_global 的地址和值

程序运行结果如下图所示：