/*
 * salinfo_decode.c - program to decode SAL error records
 *
 * Copyright (c) 2003 Hewlett-Packard Co
 *	Bjorn Helgaas <bjorn.helgaas@hp.com>
 * Copyright (c) 2004, 2005 Silicon Graphics, Inc
 *	Keith Owens <kaos@sgi.com>
 * 2003-11-05 Add options.
 *	      Handle SAL records as well as standalone raw records.
 *	      Keith Owens <kaos@sgi.com>
 * 2003-11-16 Break out oem data decoder so each platform can handle the
 *	      oem data as it likes.
 *	      Keith Owens <kaos@sgi.com>
 * 2004-10-04 Handle kernels that clear the bit themselves when there is no data.
 *	      Keith Owens <kaos@sgi.com>
 * 2005-12-14 Add options -i, -s, -l, -T.
 *	      Count and log the number of dropped records.
 *	      Keith Owens <kaos@sgi.com>
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <sys/types.h>
#include <alloca.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <inttypes.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <syslog.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/time.h>
#include <sys/wait.h>

#include "mca.h"

extern int debug;

static char *prefix;
static char *type;			/* from -t */
static char *directory;			/* from -D */
static char *trigger_filename;		/* from -T */
static char *trigger_options;
static int len_trigger_options;

#define LOG_DROPPED 30*60			/* log dropped records every 30 minutes */
static unsigned alarm_left = LOG_DROPPED;
static volatile unsigned alarm_popped;
static volatile unsigned more_to_do = 1;
static time_t first_dropped;

struct threshold {
	const char *name;
	unsigned max;
	unsigned value;
	unsigned dropped;
	unsigned set;
};

static struct threshold trigger = { "-T filename" };
static struct threshold ipct = { "-i pct", 100 };
static struct threshold spct = { "-s pct", 100 };
static struct threshold limit = { "-l limit", ~0 };
static struct threshold *dropped_all[] = { &ipct, &spct, &limit };

static unsigned errors;
static struct timeval boottime;

static sal_log_record_header_t *
salinfo_buffer(int fd, int *bufsize)
{
	int nbytes, size, alloc;
	sal_log_record_header_t *buffer;

	lseek(fd, 0, 0);
	buffer = NULL;
	alloc = 16 * 1024;	// total buffer size
	size = 0;		// amount of buffer used so far
	do {
		buffer = realloc(buffer, alloc);
		if (!buffer) {
			fprintf(stderr, "%s: Can't alloc %d bytes\n", __FUNCTION__, alloc);
			exit(1);
		}

		nbytes = read(fd, buffer + size, alloc - size);
		if (nbytes < 0) {
			perror("salinfo_buffer read");
			exit(1);
		}

		if (nbytes == alloc - size)
			alloc *= 2;

		size += nbytes;
	} while (nbytes);

	if (size) {
		if (bufsize)
			*bufsize = size;
		return buffer;
	}

	free(buffer);
	return NULL;
}

static void
usage (void)
{
	fputs("Usage:\n"
		" salinfo_decode [-d] [-i pct] [-s pct] [-l limit] [-T filename] -t type -D directory\n"
		" salinfo_decode [-d] filename\n"
		"  -d              Increment debug level\n"
		"  -i pct          Drop records if filesystem has more than pct inodes used\n"
		"  -s pct          Drop records if filesystem has more than pct space used\n"
		"  -l limit        Drop records if more than limit records per minute\n"
		"  -T filename     Write a trigger line to filename for each SAL record\n"
		"  -t type         Type of record to wait for (cmc, cpe, mca or init)\n"
		"  -D directory    Directory to store the raw and decoded records\n"
		"  filename        Decode a saved raw record, without involving SAL\n"
		, stderr);
}

/* Ben Woodard of RedHat changed the kernel salinfo code around 2.6.9-rc3 to
 * clear the cpu state bit if there is no data.  He did not add any indication
 * to user space of this change, which means that user space must deduce if it
 * is running on a kernel with or without Ben Woodward's change :(.
 *
 * Start off by assuming we are running on a changed kernel, and do not write
 * 'clear' to the kernel when there is no data.  When running on an old kernel,
 * user space will then be invoked repeatedly with no data.  Detect this loop
 * for an old kernel and turn on do_clear.
 */

static int
clear_cpu(int fd_data, int cpu, const char *data_filename, int have_data)
{
	char text[400];
	int l;
	static int prev_cpu = -1, loop = 0, do_clear = 0;

	if (have_data)
		loop = 0;
	if (!do_clear) {
		if (cpu <= prev_cpu) {
			++loop;
			if (loop == 2)
				do_clear = 1;
		}
		prev_cpu = cpu;
	}
	if (!have_data && !do_clear)
		return 0;

	snprintf(text, sizeof(text), "clear %d\n", cpu);
	l = strlen(text);
	if (write(fd_data, text, l) != l) {
		fprintf(stderr, "%s: Error writing '%s' to %s\n",
			__FUNCTION__, text, data_filename);
		perror(data_filename);
		return 1;
	}
	return 0;
}

static int oemdata_fd[2];
static volatile int child_died;		/* lock free flag to detect child death */

static void
sig_chld (int sig)
{
	child_died = 1;
	oemdata_fd[1] = -1;
	wait(NULL);
}

/* See if this platform has supplied a program to decode oem data */
static int *
fork_oemdata(void)
{
	static const char pgm[] = "salinfo_decode_oem";
	int pid;
	int pp[2], pc[2];		/* parent writes to pp[1], child writes to pc[1] */
	if (pipe(pp) || pipe(pc)) {
		fprintf(stderr, "%s: pipe failed (%m), giving up\n", __FUNCTION__);
		exit(1);
	}
	signal(SIGCHLD, sig_chld);
	if ((pid = fork()) == 0) {
		/* child reads fd 0, writes fd 1 */
		if (dup2(pp[0], 0) < 0 || dup2(pc[1], 1) < 0) {
			fprintf(stderr, "%s: dup2 failed (%m), giving up\n", __FUNCTION__);
			exit(1);
		}
		close(pp[0]);
		close(pp[1]);
		close(pc[0]);
		close(pc[1]);
		execlp(pgm, pgm, NULL);
		exit(0);
	} else if (pid >= 0) {
		/* parent parent writes oemdata_fd[1], reads oemdata_fd[0] */
		close(pp[0]);
		close(pc[1]);
		oemdata_fd[0] = pc[0];	/* mca.c read from pgm */
		oemdata_fd[1] = pp[1];	/* mca.c write to pgm */
		__asm__ __volatile__ ("" ::: "memory");	/* lock free barrier */
		if (child_died)
			oemdata_fd[1] = -1;
		return oemdata_fd;
	} else {
		fprintf(stderr, "%s: fork failed (%m), giving up\n", __FUNCTION__);
		exit(1);
	}
}

/* Log the number of dropped records every LOG_DROPPED seconds.  The only time
 * that we want to test for the logging interval expiring is while we are
 * waiting for the kernel to provide a new SAL record, so disable the alarm
 * everywhere else.  This avoids having to check for the alarm except on the
 * read path.
 */

static void
disable_alarm(void)
{
	signal(SIGALRM, SIG_IGN);
	signal(SIGHUP, SIG_IGN);
	alarm_left = alarm(0);
}

static void
sig_handler(int sig)
{
	alarm_popped = 1;
	if (sig != SIGALRM)
		more_to_do = 0;
	disable_alarm();
}

static void
enable_alarm(void)
{
	signal(SIGHUP, sig_handler);
	signal(SIGALRM, sig_handler);
	alarm(alarm_left > 0 ? alarm_left : 1);
}

static void
count_dropped(struct threshold *t)
{
	++(t->dropped);
	if (first_dropped == 0)
		first_dropped = time(NULL);
}

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
#endif

static void
log_dropped_records(void)
{
	char log[2000], line[200], *ct, *p;
	int i, drop_total = 0;
	for (i = 0; i < ARRAY_SIZE(dropped_all); ++i)
		drop_total += dropped_all[i]->dropped;
	if (!drop_total && !trigger.dropped)
		return;
	ct = asctime(localtime(&first_dropped));
	first_dropped = 0;
	if ((p = index(ct, '\n')))
		*p = '\0';
	if (drop_total) {
		snprintf(log, sizeof(log), "%d %s record%s dropped since %s",
			 drop_total, type, drop_total == 1 ? "" : "s",
			 ct);
		for (i = 0; i < ARRAY_SIZE(dropped_all); ++i) {
			if (dropped_all[i]->dropped) {
				snprintf(line, sizeof(line), ", %d %s",
					 dropped_all[i]->dropped, dropped_all[i]->name);
				dropped_all[i]->dropped = 0;
				strncat(log, line, sizeof(log));
			}
		}
		syslog(LOG_NOTICE, "%s", log);
	}
	if (trigger.dropped) {
		snprintf(log, sizeof(log), "%d %s trigger record%s dropped since %s",
			 trigger.dropped, type, trigger.dropped == 1 ? "" : "s",
			 ct);
		trigger.dropped = 0;
		syslog(LOG_NOTICE, "%s", log);
	}
}

static int
open_trigger(void)
{
	int i, l, fd;
	char *p;
	free(trigger_options);
	l = 100 + strlen(directory) + strlen(trigger_filename);
	l += 3*debug;
	trigger_options = malloc(l);
	if (!trigger_options) {
		fprintf(stderr, "%s: cannot malloc trigger_options\n", prefix);
		++errors;
		return -1;
	}
	snprintf(trigger_options, l, "-t %s -D %s", type, directory);
	p = trigger_options + strlen(trigger_options);
	if (ipct.set) {
		snprintf(p, l - (p - trigger_options), " -i %d", ipct.value);
		p += strlen(p);
	}
	if (spct.set) {
		snprintf(p, l - (p - trigger_options), " -s %d", spct.value);
		p += strlen(p);
	}
	if (limit.set) {
		snprintf(p, l - (p - trigger_options), " -l %d", limit.value);
		p += strlen(p);
	}
	if (trigger_filename) {
		snprintf(p, l - (p - trigger_options), " -T %s", trigger_filename);
		p += strlen(p);
	}
	for (i = 0; i < debug; ++i) {
		snprintf(p, l - (p - trigger_options), " -d");
		p += 3;
	}
	len_trigger_options = p - trigger_options;
	fd = open(trigger_filename, O_WRONLY|O_CREAT|O_APPEND|O_NONBLOCK, S_IRUSR|S_IWUSR);
	if (fd < 0 && errno != ENXIO)
		fprintf(stderr, "%s: open %s failed %m\n", prefix, trigger_filename);
	return fd;
}

static void
close_trigger(void)
{
	if ((int)trigger.value >= 0) {
		close(trigger.value);
		trigger.value = -1;
	}
	free(trigger_options);
	trigger_options = NULL;
	len_trigger_options = 0;
}

static void
write_trigger(const char *basename)
{
	int l = strlen(basename), i, len;
	char *line;
	if ((int)trigger.value < 0) {
		trigger.value = open_trigger();
		if ((int)trigger.value < 0) {
			count_dropped(&trigger);
			return;
		}
	}
	/* open_trigger() sets trigger_options and len_trigger_options */
	len = l + len_trigger_options + 3;
	line = alloca(len);
	snprintf(line, len, "%s %s\n", basename, trigger_options);
	l = strlen(line);
	signal(SIGPIPE, SIG_IGN);
	while (l) {
		if ((i = write(trigger.value, line, l)) <= 0) {
			if (errno == ENXIO || errno == EPIPE || errno == EWOULDBLOCK) {
				count_dropped(&trigger);
				return;
			}
			fprintf(stderr, "%s: %s failed %m\n", prefix, __FUNCTION__);
			++errors;
			close_trigger();
			return;
		}
		l -= i;
	}
	signal(SIGPIPE, SIG_DFL);
}

/* Talk to /proc/sal/type/{event,data} to extract, save, decode and clear SAL
 * records.
 */
static int
talk_to_sal (void)
{
	sal_log_record_header_t *buffer;
	char event_filename[PATH_MAX], data_filename[PATH_MAX], text[200];
	int fd_event = -1, fd_data = -1, i, cpu, ret = 1;
	int *oemdata_fd = NULL;
	static const char *rd[] = { "raw", "decoded" };

	openlog(prefix, LOG_PID, LOG_DAEMON);

	for (i = 0; i < 2; ++i) {
		int fd;
		char filename[PATH_MAX];
		snprintf(filename, sizeof(filename), "%s/%s/.check", directory, rd[i]);
		if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC)) < 0) {
			perror(filename);
			goto out;
		}
		close(fd);
		unlink(filename);
	}

	snprintf(event_filename, sizeof(event_filename), "/proc/sal/%s/event", type);
	if ((fd_event = open(event_filename, O_RDONLY)) < 0) {
		perror(event_filename);
		goto out;
	}
	snprintf(data_filename, sizeof(data_filename), "/proc/sal/%s/data", type);
	if ((fd_data = open(data_filename, O_RDWR)) < 0) {
		perror(data_filename);
		goto out;
	}

	oemdata_fd = fork_oemdata();

	/* Run until we are killed */
	while (more_to_do) {
		int i, l, fd, bufsize, suffix, ret;
		char filename[PATH_MAX], basename[PATH_MAX];
		enable_alarm();
		ret = read(fd_event, text, sizeof(text));
		if (alarm_popped || ret <= 0) {
			if (alarm_popped) {
				log_dropped_records();
				alarm_popped = 0;
				alarm_left = LOG_DROPPED;
				continue;
			}
			if (ret == 0)
				goto out;
			if (errno == EINTR)
				continue;
			perror(event_filename);
			goto out;
		}
		disable_alarm();
		if (sscanf(text, "read %d\n", &cpu) != 1) {
			fprintf(stderr, "%s: Unknown text '%s' from %s\n",
				__FUNCTION__, text, event_filename);
			goto out;
		}
		l = strlen(text);
		if (write(fd_data, text, l) != l) {
			fprintf(stderr, "%s: Error writing '%s' to %s\n",
				__FUNCTION__, text, data_filename);
			perror(data_filename);
			goto out;
		}
		if (!(buffer = salinfo_buffer(fd_data, &bufsize))) {
			if (clear_cpu(fd_data, cpu, data_filename, 0))
				goto out;
			continue;	/* event but no data is normal at boot */
		}

		if (ipct.set || spct.set) {
			struct statfs buf;
			if (statfs(directory, &buf) < 0) {
				fprintf(stderr, "%s: Error in statfs(%s) %m\n",
					__FUNCTION__, directory);
				perror(data_filename);
				goto out;
			}
			/* Round up the percentage calculations to match the values from df */
			if (ipct.set) {
				int used = ((buf.f_files - buf.f_ffree) * 100.0) / buf.f_files + 0.5;
				if (used >= ipct.value) {
					count_dropped(&ipct);
					clear_cpu(fd_data, cpu, data_filename, 1);
					continue;
				}
			}
			if (spct.set) {
				int used = ((buf.f_blocks - buf.f_bavail) * 100.0) / buf.f_blocks + 0.5;
				if (used >= spct.value) {
					count_dropped(&spct);
					clear_cpu(fd_data, cpu, data_filename, 1);
					continue;
				}
			}
		}

		if (limit.set) {
			/* Rate limiting is awkward.  At boot time there can be
			 * a backlog of SAL records in NVRAM, these are read at
			 * a faster rate than normal.  The timestamp in the SAL
			 * record is not reliable either, it comes from the OS
			 * and, if the OS had not started, can be zero.
			 *
			 * Do not apply rate limiting until at least five
			 * minutes after startup, to get past any initial boot
			 * records.
			 */
			struct timeval now;
			static struct timeval last;
			if (gettimeofday(&now, NULL) < 0) {
				fprintf(stderr, "%s: Error in gettimeofday() %m\n",
					__FUNCTION__);
				perror(data_filename);
				goto out;
			}
			if (now.tv_sec - boottime.tv_sec >= 5*60) {
				if (timerisset(&last)) {
					long usec = (now.tv_sec - last.tv_sec) * 1000000 +
						    (now.tv_usec - last.tv_usec);
					/* Instantaneous rate limiting */
					if (limit.value == 0 || usec <= (60 * 1000000 / limit.value)) {
						count_dropped(&limit);
						clear_cpu(fd_data, cpu, data_filename, 1);
						last = now;
						continue;
					}
				}
				last = now;
			}
		}

		for (suffix = 0; ; ++suffix) {
			snprintf(basename, sizeof(basename),
				"%02x%02x-%02x-%02x-%02x_%02x_%02x-cpu%d-%s.%d",
				buffer->timestamp.slh_century,
				buffer->timestamp.slh_year,
				buffer->timestamp.slh_month,
				buffer->timestamp.slh_day,
				buffer->timestamp.slh_hour,
				buffer->timestamp.slh_minute,
				buffer->timestamp.slh_second,
				cpu,
				type,
				suffix);
			snprintf(filename, sizeof(filename),
				"%s/raw/%s", directory, basename);
			if ((fd = open(filename, O_WRONLY|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR)) >= 0)
				break;
			if (errno != EEXIST) {
				perror(filename);
				goto out;
			}
			if ((fd = open(filename, O_RDONLY)) >= 0) {
				sal_log_record_header_t *buffer2;
				int bufsize2, duplicate;
				buffer2 = salinfo_buffer(fd, &bufsize2);
				duplicate = buffer2 &&
					    bufsize == bufsize2 &&
					    memcmp(buffer, buffer2, bufsize) == 0;
				free(buffer2);
				if (duplicate) {
					syslog(LOG_NOTICE,
					       "duplicate record dropped: %s",
					       basename);
					goto done;
				}
			}
		}

		for (l = 0; l < bufsize; ) {
			i = write(fd, (char *)buffer + l, bufsize - l);
			if (i <= 0) {
				perror(filename);
				goto out;
			}
			l += i;
		}
		fsync(fd);

		snprintf(filename, sizeof(filename),
			"%s/decoded/%s", directory, basename);
		fclose(stdout);
		if (!(stdout = fopen(filename, "a"))) {
			perror(filename);
			goto out;
		}
		fclose(stderr);
		if (!((stderr = fopen(filename, "a")))) {
			perror(filename);
			goto out;
		}

		printf("BEGIN HARDWARE ERROR STATE from %s on cpu %d\n", type, cpu);
		platform_info_print(buffer, 1, fd_data, cpu, oemdata_fd);
		printf("END HARDWARE ERROR STATE from %s on cpu %d\n", type, cpu);
		fflush(stdout);
		fsync(fileno(stdout));
		fclose(stdout);
		if (trigger.set)
			write_trigger(basename);
done:
		close(fd);
		if (clear_cpu(fd_data, cpu, data_filename, 1))
			goto out;
		free(buffer);
	}

out:
	if (oemdata_fd && oemdata_fd[1] > 0) {
		close(oemdata_fd[0]);
		close(oemdata_fd[1]);
		wait(NULL);
	}
	if (fd_event > 0)
		close(fd_event);
	if (fd_data > 0)
		close(fd_data);
	return ret;
}

/* Decode an existing raw file */
static int
decode_a_file (const char *filename)
{
	sal_log_record_header_t *buffer;
	int fd;
	int *oemdata_fd = NULL;
	if ((fd = open(filename, O_RDONLY)) < 0) {
		perror(filename);
		return 1;
	}
	if ((buffer = salinfo_buffer(fd, NULL))) {
		oemdata_fd = fork_oemdata();
		printf("BEGIN HARDWARE ERROR STATE from %s\n", filename);
		platform_info_print(buffer, 0, fd, -1, oemdata_fd);
		printf("END HARDWARE ERROR STATE from %s\n", filename);
	}
	if (oemdata_fd && oemdata_fd[1] > 0) {
		close(oemdata_fd[0]);
		close(oemdata_fd[1]);
		wait(NULL);
	}
	close(fd);
	free(buffer);
	return 0;
}

static void
optint(struct threshold *t)
{
	char *p;
	if (!*optarg) {
		fprintf(stderr, "%s: %s, no value supplied\n",
		       prefix, t->name);
		++errors;
		return;
	}
	t->value = strtoumax(optarg, &p, 0);
	if (*p) {
		fprintf(stderr, "%s: invalid character in %.2s %s\n",
		       prefix, t->name, optarg);
		++errors;
		return;
	}
	if (t->value > t->max) {
		fprintf(stderr, "%s: option %.2s %s exceeds maximum of %d\n",
		       prefix, t->name, optarg, t->max);
		++errors;
		return;
	}
	t->set = 1;
}

int main(int argc, char **argv)
{
	char *filename = NULL;
	int o, ret;
	prefix = argv[0];

	while ((o = getopt(argc, argv, "hdi:s:l:T:t:D:")) > 0) {
		switch (o) {
		case 'h':
			usage();
			return 0;
		case 'd':
			++debug;
			break;
		case 'i':
			optint(&ipct);
			break;
		case 's':
			optint(&spct);
			break;
		case 'l':
			optint(&limit);
			break;
		case 'T':
			trigger_filename = optarg;
			trigger.set = 1;
			break;
		case 't':
			type = optarg;
			break;
		case 'D':
			directory = optarg;
			break;
		default:
			usage();
			return 1;
		}
	}

	if (optind == argc-1)
		filename = argv[optind++];
	if (optind != argc ||
		argc == 1 ||
		(type && !directory) ||
		(!type && directory) ||
		(filename && type)  ||
		!(filename || type)) {
		usage();
		return 1;
	}
	if ((ipct.set || spct.set || limit.set || trigger.set) && !type) {
		usage();
		return 1;
	}
	if (limit.set) {
		if (gettimeofday(&boottime, NULL) < 0) {
			fprintf(stderr, "%s: Error in gettimeofday() %m\n",
				__FUNCTION__);
			++errors;
		}
	}
	if (errors)
		return 1;

	if (type) {
		if (trigger.set) {
			trigger.value = open_trigger();
			if (errors)
				return 1;
		}
		ret = talk_to_sal();
		if (trigger.set)
			close_trigger();
	} else
		ret = decode_a_file(filename);
	return ret;
}
