OpenMP在gcc里是如何启动的?
- 编译原理
- 2025-05-23
- 28热度
- 0评论
-
- 图片未显示锁竞争(Lock Contention = 0s),表明 PyTorch 或 MKL 可能绕过 GOMP 的全局任务锁,使用原子操作或分区任务。
/* This structure is used to communicate across pthread_create. */
struct gomp_thread_start_data
{
void (*fn) (void *);
void *fn_data;
struct gomp_team_state ts;
struct gomp_task *task;
struct gomp_thread_pool *thread_pool;
unsigned int place;
unsigned int num_teams;
unsigned int team_num;
bool nested;
pthread_t handle;
};
/* This function is a pthread_create entry point. This contains the idle
loop in which a thread waits to be called up to become part of a team. */
static void *
gomp_thread_start (void *xdata)
{
struct gomp_thread_start_data *data = xdata;
struct gomp_thread *thr;
struct gomp_thread_pool *pool;
void (*local_fn) (void *);
void *local_data;
#if defined HAVE_TLS || defined USE_EMUTLS
thr = &gomp_tls_data;
#else
struct gomp_thread local_thr;
thr = &local_thr;
#endif
gomp_sem_init (&thr->release, 0);
/* Extract what we need from data. */
local_fn = data->fn;
local_data = data->fn_data;
thr->thread_pool = data->thread_pool;
thr->ts = data->ts;
thr->task = data->task;
thr->place = data->place;
thr->num_teams = data->num_teams;
thr->team_num = data->team_num;
#ifdef GOMP_NEEDS_THREAD_HANDLE
thr->handle = data->handle;
#endif
#if !(defined HAVE_TLS || defined USE_EMUTLS)
pthread_setspecific (gomp_tls_key, thr);
#endif
thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
/* Make thread pool local. */
pool = thr->thread_pool;
if (data->nested)
{
struct gomp_team *team = thr->ts.team;
struct gomp_task *task = thr->task;
gomp_barrier_wait (&team->barrier);
local_fn (local_data);
gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
gomp_barrier_wait_last (&team->barrier);
}
else
{
pool->threads[thr->ts.team_id] = thr;
gomp_simple_barrier_wait (&pool->threads_dock);
do
{
struct gomp_team *team = thr->ts.team;
struct gomp_task *task = thr->task;
local_fn (local_data);
gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
gomp_simple_barrier_wait (&pool->threads_dock);
local_fn = thr->fn;
local_data = thr->data;
thr->fn = NULL;
}
while (local_fn);
}
gomp_sem_destroy (&thr->release);
pthread_detach (pthread_self ());
thr->thread_pool = NULL;
thr->task = NULL;
return NULL;
}
#endif
- 适用场景:线程属于顶级并行区域,通常与线程池关联。
-
流程:
- 注册线程:将当前线程存储到线程池的 threads 数组中,索引为 team_id。
- 初始屏障:调用 gomp_simple_barrier_wait,等待线程池中的所有线程到达 threads_dock 屏障。
- 循环执行任务:
- 执行 local_fn(local_data),运行分配的并行任务。
- 调用 gomp_team_barrier_wait_final,等待团队完成任务。
- 调用 gomp_finish_task,清理任务状态。
- 再次调用 gomp_simple_barrier_wait,等待线程池分配新任务。
- 更新 local_fn 和 local_data 从 thr->fn 和 thr->data(可能由线程池分配新任务)。
- 如果 local_fn 非空,循环继续;否则退出。
- 线程池重用:非嵌套线程在完成任务后不立即退出,而是等待线程池分配新任务,体现了 OpenMP 的线程池优化。
static inline void
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
{
gomp_barrier_wait (&bar->bar);
}
void
gomp_barrier_wait (gomp_barrier_t *barrier)
{
gomp_barrier_wait_end (barrier, gomp_barrier_wait_start (barrier));
}
void
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
unsigned int n;
if (state & BAR_WAS_LAST)
{
n = --bar->arrived;
if (n > 0)
{
do
gomp_sem_post (&bar->sem1);
while (--n != 0);
gomp_sem_wait (&bar->sem2);
}
gomp_mutex_unlock (&bar->mutex1);
}
else
{
gomp_mutex_unlock (&bar->mutex1);
gomp_sem_wait (&bar->sem1);
#ifdef HAVE_SYNC_BUILTINS
n = __sync_add_and_fetch (&bar->arrived, -1);
#else
gomp_mutex_lock (&bar->mutex2);
n = --bar->arrived;
gomp_mutex_unlock (&bar->mutex2);
#endif
if (n == 0)
gomp_sem_post (&bar->sem2);
}
}