76 #include <sys/types.h>
81 #include <sys/socket.h>
84 #include <sys/resource.h>
86 #include <netinet/in.h>
87 #include <arpa/inet.h>
96 #include <semaphore.h>
99 #ifdef HAVE_LIBSYSTEMD
100 #include <systemd/sd-daemon.h>
103 #include <qb/qbdefs.h>
104 #include <qb/qblog.h>
105 #include <qb/qbloop.h>
106 #include <qb/qbutil.h>
107 #include <qb/qbipcs.h>
130 #ifdef HAVE_SMALL_MEMORY_FOOTPRINT
131 #define IPC_LOGSYS_SIZE 1024*64
133 #define IPC_LOGSYS_SIZE 8192*128
148 #define SERVER_BACKLOG 5
150 static int sched_priority = 0;
152 static unsigned int service_count = 32;
158 static int sync_in_process = 1;
160 static qb_loop_t *corosync_poll_handle;
166 static const char *corosync_lock_file =
LOCALSTATEDIR"/run/corosync.pid";
168 static char corosync_config_file[PATH_MAX + 1] =
COROSYSCONFDIR "/corosync.conf";
172 return (corosync_poll_handle);
180 int (*dispatch_fn) (
int fd,
184 return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
190 return qb_loop_poll_del(handle, fd);
207 return (corosync_config_file);
210 static void corosync_blackbox_write_to_file (
void)
212 char fname[PATH_MAX];
213 char fdata_fname[PATH_MAX];
214 char time_str[PATH_MAX];
215 struct tm cur_time_tm;
219 cur_time_t = time(NULL);
220 localtime_r(&cur_time_t, &cur_time_tm);
222 strftime(time_str, PATH_MAX,
"%Y-%m-%dT%H:%M:%S", &cur_time_tm);
223 if (snprintf(fname, PATH_MAX,
"%s/fdata-%s-%lld",
226 (
long long int)getpid()) >= PATH_MAX) {
231 if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
235 snprintf(fdata_fname,
sizeof(fdata_fname),
"%s/fdata",
get_state_dir());
237 if (symlink(fname, fdata_fname) == -1) {
243 static void unlink_all_completed (
void)
246 qb_loop_stop (corosync_poll_handle);
255 static int32_t sig_diag_handler (
int num,
void *data)
261 static int32_t sig_exit_handler (
int num,
void *data)
268 static void sigsegv_handler (
int num)
270 (void)signal (num, SIG_DFL);
271 corosync_blackbox_write_to_file ();
276 #define LOCALHOST_IP inet_addr("127.0.0.1")
278 static void *corosync_group_handle;
285 static void serialize_lock (
void)
289 static void serialize_unlock (
void)
293 static void corosync_sync_completed (
void)
296 "Completed service synchronization, ready to provide service.");
306 #ifdef HAVE_LIBSYSTEMD
307 sd_notify (0,
"READY=1");
311 static int corosync_sync_callbacks_retrieve (
319 if (callbacks == NULL) {
334 static void member_object_joined (
unsigned int nodeid)
341 "runtime.members.%u.ip",
nodeid);
343 "runtime.members.%u.join_count",
nodeid);
345 "runtime.members.%u.status",
nodeid);
360 static void member_object_left (
unsigned int nodeid)
365 "runtime.members.%u.status",
nodeid);
372 static void confchg_fn (
374 const unsigned int *member_list,
size_t member_list_entries,
375 const unsigned int *left_list,
size_t left_list_entries,
376 const unsigned int *joined_list,
size_t joined_list_entries,
380 int abort_activate = 0;
382 if (sync_in_process == 1) {
389 for (i = 0; i < left_list_entries; i++) {
390 member_object_left (left_list[i]);
392 for (i = 0; i < joined_list_entries; i++) {
393 member_object_joined (joined_list[i]);
398 for (i = 0; i < service_count; i++) {
401 member_list, member_list_entries,
402 left_list, left_list_entries,
403 joined_list, joined_list_entries,
ring_id);
407 if (abort_activate) {
418 static void priv_drop (
void)
423 static void corosync_tty_detach (
void)
451 devnull = open(
"/dev/null", O_RDWR);
456 if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
457 || dup2(devnull, 2) < 0) {
464 static void corosync_mlockall (
void)
467 struct rlimit rlimit;
469 rlimit.rlim_cur = RLIM_INFINITY;
470 rlimit.rlim_max = RLIM_INFINITY;
472 #ifndef RLIMIT_MEMLOCK
473 #define RLIMIT_MEMLOCK RLIMIT_VMEM
478 res = mlockall (MCL_CURRENT | MCL_FUTURE);
481 "Could not lock memory of service to avoid page faults");
486 static void corosync_totem_stats_updater (
void *data)
489 uint32_t total_mtt_rx_token;
490 uint32_t total_backlog_calc;
491 uint32_t total_token_holdtime;
506 cstr =
"number of multicast sendmsg failures is above threshold";
510 cstr =
"totem is continuously in gather state";
514 "Totem is unable to form a cluster because of an "
515 "operating system or network fault (reason: %s). The most common "
516 "cause of this message is that the local firewall is "
517 "configured improperly.", cstr);
523 total_mtt_rx_token = 0;
524 total_token_holdtime = 0;
525 total_backlog_calc = 0;
557 corosync_totem_stats_updater,
558 &corosync_stats_timer_handle);
561 static void corosync_totem_stats_init (
void)
565 corosync_totem_stats_updater,
566 &corosync_stats_timer_handle);
569 static void deliver_fn (
572 unsigned int msg_len,
573 int endian_conversion_required)
575 const struct qb_ipc_request_header *
header;
581 if (endian_conversion_required) {
604 if (endian_conversion_required) {
605 assert(
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
615 const struct iovec *iovec,
616 unsigned int iov_len,
619 const struct qb_ipc_request_header *req = iovec->iov_base;
623 service = req->id >> 16;
624 fn_id = req->id & 0xffff;
633 static void corosync_ring_id_create_or_load (
639 char filename[PATH_MAX];
641 snprintf (filename,
sizeof(filename),
"%s/ringid_%u",
643 fd = open (filename, O_RDONLY, 0700);
654 if ((fd == -1) || (res !=
sizeof (uint64_t))) {
657 fd = open (filename, O_CREAT|O_RDWR, 0700);
663 "Couldn't write ringid file '%s'", filename);
669 "Couldn't create ringid file '%s'", filename);
678 static void corosync_ring_id_store (
682 char filename[PATH_MAX];
686 snprintf (filename,
sizeof(filename),
"%s/ringid_%u",
689 fd = open (filename, O_WRONLY, 0700);
691 fd = open (filename, O_CREAT|O_RDWR, 0700);
713 static qb_loop_timer_handle recheck_the_q_level_timer;
729 unsigned int service,
732 void *sending_allowed_private_data)
736 struct iovec reserve_iovec;
737 struct qb_ipc_request_header *
header = (
struct qb_ipc_request_header *)msg;
740 reserve_iovec.iov_base = (
char *)
header;
741 reserve_iovec.iov_len =
header->size;
744 corosync_group_handle,
755 sending_allowed = QB_FALSE;
761 sending_allowed = QB_TRUE;
763 sending_allowed = QB_TRUE;
770 return -EHOSTUNREACH;
773 return (sending_allowed);
791 assert (source != NULL);
802 assert ((source != NULL) && (conn != NULL));
815 static void timer_function_scheduler_timeout (
void *data)
818 unsigned long long tv_current;
819 unsigned long long tv_diff;
820 uint64_t schedmiss_event_tstamp;
822 tv_current = qb_util_nano_current_get ();
824 if (timeout_data->
tv_prev == 0) {
828 timeout_data->
tv_prev = tv_current;
832 tv_diff = tv_current - timeout_data->
tv_prev;
833 timeout_data->
tv_prev = tv_current;
836 schedmiss_event_tstamp = qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC;
839 "(threshold is %0.4f ms). Consider token timeout increase.",
840 schedmiss_event_tstamp,
841 (
float)tv_diff / QB_TIME_NS_IN_MSEC, (
float)timeout_data->
max_tv_diff / QB_TIME_NS_IN_MSEC);
850 qb_loop_timer_add (corosync_poll_handle,
854 timer_function_scheduler_timeout,
859 static int corosync_set_rr_scheduler (
void)
863 #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
866 sched_priority = sched_get_priority_max (SCHED_RR);
867 if (sched_priority != -1) {
872 "Could not set SCHED_RR at priority %d",
876 #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
877 qb_log_thread_priority_set (SCHED_OTHER, 0);
885 #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
886 res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
892 "Could not set logsys thread priority."
893 " Can't continue because of priority inversions.");
899 "Could not get maximum scheduler priority");
905 "The Platform is missing process priority setting features. Leaving at default.");
915 static const char *corosync_basename(
const char *file_name)
918 base = strrchr (file_name,
'/');
927 _logsys_log_printf(
int level,
int subsys,
928 const char *function_name,
929 const char *file_name,
935 _logsys_log_printf(
int level,
int subsys,
936 const
char *function_name,
937 const
char *file_name,
939 const
char *format, ...)
943 va_start(ap, format);
944 qb_log_from_external_source_va(function_name, corosync_basename(file_name),
945 format, level, file_line,
950 static void fplay_key_change_notify_fn (
952 const char *key_name,
957 if (strcmp(key_name,
"runtime.blackbox.dump_flight_data") == 0) {
958 fprintf(stderr,
"Writetofile\n");
959 corosync_blackbox_write_to_file ();
961 if (strcmp(key_name,
"runtime.blackbox.dump_state") == 0) {
962 fprintf(stderr,
"statefump\n");
967 static void corosync_fplay_control_init (
void)
976 fplay_key_change_notify_fn,
980 fplay_key_change_notify_fn,
984 static void force_gather_notify_fn(
986 const char *key_name,
998 if (strcmp(key_name,
"runtime.force_gather") == 0) {
1007 static void corosync_force_gather_init (
void)
1015 force_gather_notify_fn,
1026 static void set_icmap_ro_keys_flag (
void)
1061 static void main_service_ready (
void)
1074 corosync_totem_stats_init ();
1075 corosync_fplay_control_init ();
1076 corosync_force_gather_init ();
1079 corosync_sync_callbacks_retrieve,
1080 corosync_sync_completed);
1083 static enum e_corosync_done corosync_flock (
const char *lockfile, pid_t pid)
1093 lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
1100 lock.l_type = F_WRLCK;
1102 lock.l_whence = SEEK_SET;
1104 if (fcntl (lf, F_SETLK, &
lock) == -1) {
1124 if (ftruncate (lf, 0) == -1) {
1128 goto error_close_unlink;
1131 memset (pid_s, 0,
sizeof (pid_s));
1132 snprintf (pid_s,
sizeof (pid_s) - 1,
"%u\n", pid);
1135 if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
1136 if (errno == EINTR) {
1140 "Error was %s", strerror (errno));
1142 goto error_close_unlink;
1146 if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
1148 "Error was %s", strerror (errno));
1150 goto error_close_unlink;
1152 fd_flag |= FD_CLOEXEC;
1153 if (fcntl (lf, F_SETFD, fd_flag) == -1) {
1155 "Error was %s", strerror (errno));
1157 goto error_close_unlink;
1170 static int corosync_move_to_root_cgroup(
void) {
1181 f = fopen(
"/sys/fs/cgroup/cpu/cpu.rt_runtime_us",
"rt");
1184 "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
1191 f = fopen(
"/sys/fs/cgroup/cpu/tasks",
"w");
1198 if (fprintf(f,
"%jd\n", (intmax_t)getpid()) <= 0) {
1201 goto close_and_exit_res;
1205 if (fclose(f) != 0) {
1216 int main (
int argc,
char **argv,
char **envp)
1218 const char *error_string;
1221 int background, sched_rr, prio, testonly, move_to_root_cgroup;
1222 struct stat stat_out;
1224 uint64_t totem_config_warnings;
1229 int log_subsys_id_totem;
1236 while ((ch = getopt (argc, argv,
"c:ftv")) != EOF) {
1240 res = snprintf(corosync_config_file,
sizeof(corosync_config_file),
"%s", optarg);
1241 if (res >=
sizeof(corosync_config_file)) {
1242 fprintf (stderr,
"Config file path too long.\n");
1246 return EXIT_FAILURE;
1256 printf (
"Corosync Cluster Engine, version '%s'\n",
VERSION);
1257 printf (
"Copyright (c) 2006-2018 Red Hat, Inc.\n");
1259 return EXIT_SUCCESS;
1265 " -c : Corosync config file path.\n"\
1266 " -f : Start application in foreground.\n"\
1267 " -t : Test configuration and exit.\n"\
1268 " -v : Display version and SVN revision of Corosync and exit.\n");
1270 return EXIT_FAILURE;
1278 (void)signal (SIGSEGV, sigsegv_handler);
1279 (void)signal (SIGABRT, sigsegv_handler);
1280 #if MSG_NOSIGNAL != 0
1281 (void)signal (SIGPIPE, SIG_IGN);
1285 fprintf (stderr,
"Corosync Executive couldn't initialize configuration component.\n");
1286 syslog (
LOGSYS_LEVEL_ERROR,
"Corosync Executive couldn't initialize configuration component.");
1289 set_icmap_ro_keys_flag();
1302 fprintf (stderr,
"%s\n", error_string);
1308 fprintf (stderr,
"Corosync Executive couldn't initialize statistics component.\n");
1309 syslog (
LOGSYS_LEVEL_ERROR,
"Corosync Executive couldn't initialize statistics component.");
1323 fprintf(stderr,
"%s", error_string);
1337 "totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
1338 "totempg.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c");
1344 if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
1352 "Please make sure it has correct context and rights.",
get_state_dir());
1376 "Nodelist one is going to be used.");
1379 if (totem_config_warnings != 0) {
1400 move_to_root_cgroup = 1;
1402 if (strcmp(tmp_str,
"yes") != 0) {
1403 move_to_root_cgroup = 0;
1412 if (move_to_root_cgroup) {
1413 (void)corosync_move_to_root_cgroup();
1418 if (strcmp(tmp_str,
"yes") != 0) {
1426 if (strcmp(tmp_str,
"max") == 0) {
1428 }
else if (strcmp(tmp_str,
"min") == 0) {
1433 tmpli = strtol(tmp_str, &ep, 10);
1434 if (errno != 0 || *ep !=
'\0' || tmpli > INT_MAX || tmpli < INT_MIN) {
1449 if (corosync_set_rr_scheduler () != 0) {
1457 if (setpriority(PRIO_PGRP, 0, prio) != 0) {
1459 "Could not set priority %d", prio);
1485 corosync_tty_detach ();
1496 corosync_mlockall ();
1498 corosync_poll_handle = qb_loop_create ();
1504 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
1505 SIGUSR2, NULL, sig_diag_handler, NULL);
1506 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1507 SIGINT, NULL, sig_exit_handler, NULL);
1508 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1509 SIGQUIT, NULL, sig_exit_handler, NULL);
1510 qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
1511 SIGTERM, NULL, sig_exit_handler, NULL);
1518 if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) !=
COROSYNC_DONE_EXIT) {
1533 corosync_poll_handle,
1541 main_service_ready);
1544 &corosync_group_handle,
1549 corosync_group_handle,
1570 qb_loop_run (corosync_poll_handle);
1580 qb_loop_destroy (corosync_poll_handle);
1589 unlink (corosync_lock_file);
1593 return EXIT_SUCCESS;