/* libgc API
   ---------

   See:

     https://hboehm.info/gc/gcinterface.html
     https://github.com/ivmai/bdwgc/blob/57ccbcc/include/gc/gc.h#L459

   The latter is more complete.

   libgc provides both upper-case, e.g. GC_MALLOC(), and lower-case, e.g.
   GC_malloc(), versions of many functions. It’s not totally clear to me what
   the separation principles are, though the vibe does seem to prefer the
   upper-case versions. We use the upper-case when available.

   Zeroing newly-allocated memory
   ------------------------------

   Because we use a lot of zero-terminated data structures, it would be nice
   for the allocation functions to just always return zeroed buffers. We also
   want to not require libgc, i.e., we want to still be able to use malloc(3)
   and realloc(3) under the hood. It’s easy to provide a zeroing
   malloc(3)-workalike, and we do, but as far as I can tell, it’s impossible
   to do so for realloc(3)-alike unless we either (1) maintain our own
   allocation size tracking or (2) use highly non-portable code. Neither of
   these seemed worth the effort and complexity.

   This is because, as it turns out, the length of an allocated buffer is a
   more complicated notion than it seems. A buffer has *two* different
   lengths: L1 is the size requested by the original caller, and L2 is the
   size actually allocated; L2 ≥ L1. Neither are reliably available:

     * L1: The allocator can’t provide it, and while the caller had it at the
       time of previous allocation, it might not have kept it.

     * L2: Not available from the libc allocator without fairly extreme
       non-portability and/or difficult constraints [1], though libgc does
       provide it with GC_size(). The caller never knew it.

   Suppose we call realloc() with a new length Lν, where Lν > L2 ≥ L1. To zero
   the new part of the buffer, we must zero (L1,Lν], or (L2,Lν] if we assume
   (L1,L2] are still zero from the initial malloc(), and leave prior bytes
   untouched. But we don’t know either L1 or L2 reliably, so we’re hosed,
   whether we call an upstream realloc() or malloc() an entirely new buffer,
   then memcpy(3).

   I suspect this is why libc provides calloc(3) but not an equivalent for
   realloc(3).

   [1]: https://stackoverflow.com/questions/1281686 */

#define _GNU_SOURCE
#include "config.h"

#include <string.h>
#include <unistd.h>

#ifdef HAVE_GC
#include <gc.h>
#endif

#include "all.h"


/** Macros **/

/** Types **/

/** Constants **/

/** Function prototytpes (private) **/

ssize_t kB(ssize_t byte_ct);

#if defined(HAVE_GC) && !defined(HAVE_GC_SET_MARKERS_COUNT)
void GC_set_markers_count(unsigned int ct);
#endif


/** Globals **/

/* Note: All the memory statistics are signed “ssize_t” rather than the more
   correct unsigned “size_t” so that subtractions are less error-prone (we
   report lots of differences). We assume that memory usage is small enough
   for this to not matter. */

/* Size of the stack, heap, and anonymous mmap(2) mappings at previous
   mem_log() call. */
ssize_t stack_prev = 0;
ssize_t heap_prev = 0;
ssize_t anon_prev = 0;

#ifdef HAVE_GC

/* Note: The first four counters are from GC_prof_stats_s fields and have the
   corresponding names. Total size of allocated blocks is derived. See gc.h. */

/* Total size of the heap. This includes “unmapped” bytes that libgc is
   tracking but has given back to the OS, I assume to be re-requested from the
   OS if needed. */
ssize_t heapsize_prev = 0;

/* Free bytes in the heap, both mapped and unmapped. */
ssize_t free_prev = 0;

/* Unmapped bytes (i.e., returned to the OS but still tracked by libgc) in the
   heap. */
ssize_t unmapped_prev = 0;

/* Number of garbage collections done so far. */
ssize_t gc_no_prev = 0;

/* Total time spent doing garbage collection, in milliseconds. Corresponds to
   GC_get_full_gc_total_time(). Note that because ch-run is single-threaded,
   we do not report time spent collecting with the world stopped. */
long time_collecting_prev = 0;

#endif


/** Functions **/

/** Replacement for GC_set_markers_count() in old libgc. */
#if defined(HAVE_GC) && !defined(HAVE_GC_SET_MARKERS_COUNT)
void GC_set_markers_count(unsigned int ct)
{
   T__ (ct == 1);  // only value we call it with
   setenv("GC_MARKERS", "1", true);
}
#endif

/* Return true if buffer buf of length size is all zeros, false otherwise. */
bool buf_zero_p(void *buf, size_t size)
{
   for (size_t i = 0; i < size; i++)
      if (((char *)buf)[i] != 0)
         return false;
   return true;
}

/* free(3)-alike that does nothing. Don’t call it. Provided for libraries that
   let us hook memory allocation and de-allocation, e.g. cJSON. */
void free_ch(void *p)
{
}

/* Fork the process. In parent, return the PID of the child; in the child,
   return 0. Cannot fail.

   The main purpose of this wrapper is to do an aggressive garbage collection
   prior to fork(2) so the child is a small as possible. */
pid_t fork_ch(void)
{
   pid_t child;

   mem_log("fork");
   garbageinate("fkgc");

#undef fork
   child = fork();
   Tfe (child >= 0, "can't fork");
#define fork FN_BLOCKED

   return child;
}

/* If linked with libgc, do a maximum-effort garbage collection; otherwise, do
   nothing. Use when to tag memory logging. */
void garbageinate(const char *when)
{
#ifdef HAVE_GC
   GC_gcollect_and_unmap();
   mem_log(when);
#endif
}

/* Convert a signed number of bytes to kilobytes (truncated) and return it. */
ssize_t kB(ssize_t byte_ct)
{
   return byte_ct / 1024;
}

/* Allocate and return a new buffer of length size bytes. The initial contents
   of the buffer are undefined.

   If pointerful, then the buffer may contain pointers. Otherwise, the caller
   guarantees no pointers will ever be stored in the buffer. This allows
   garbage collection optimizations. If unsure, say true. */
#undef malloc
void *malloc_ch(size_t size, bool pointerful)
{
   void *buf;

#ifdef HAVE_GC
   buf = pointerful ? GC_MALLOC(size) : GC_MALLOC_ATOMIC(size);
#else
   (void)pointerful;  // suppress warning
   buf = malloc(size);
#endif

   T_e (buf);
   return buf;
}
#define malloc FN_BLOCKED

/* Like malloc_ch(), but same API as malloc(3). Prefer malloc_ch(). This is
   provided for libraries that let us hook memory allocation and
   de-allocation, e.g. cJSON. */
void *malloc_pointerful(size_t size)
{
   return malloc_ch(size, true);
}

/* Like malloc_ch(), but buffer contents are zeroed. */
void *malloc_zeroed(size_t size, bool pointerful)
{
   void *buf = malloc_ch(size, pointerful);
   memset(buf, 0, size);
   return buf;
}

/* Shut down memory management. */
void mem_exit(void)
{
   mem_log("exit");
}

/* Initialize memory management. We don’t log usage here because it’s called
   before logging is up. */
void mem_init(void)
{
#ifdef HAVE_GC
   // Multi-threaded GC causes unshare(2) to fail (#2027), so we turn off
   // threading. It’s a small program and we probably don’t care about any
   // performance boost. Alternatives:
   //
   //   1. Stop and restart the marker threads around unshare(2). I couldn’t
   //      figure out how to do this.
   //
   //   2. End garbage collection before unshare(2). However, we do things
   //      afterwards that probably should be garbage collected, and in any
   //      cause GC_deinit() before unshare(2) segfaults.
   GC_set_markers_count(1);
   GC_INIT();
   GC_start_performance_measurement();
#endif
}

/* Log stack and heap memory usage, and GC statistics if enabled, to stderr
   and syslog if enabled. */
void mem_log(const char *when)
{
   FILE *fp;
   char *line = NULL;
   char *s;
   ssize_t stack_len = 0, heap_len = 0, anon_len = 0;
   ssize_t total_len, total_prev;
#ifdef HAVE_GC
   struct GC_prof_stats_s ps;
   ssize_t used, used_prev;
   long time_collecting;
#endif

   /* Compute stack, heap, and anonymous mapping sizes. While awkward, AFAICT
      this is the best available way to get these sizes. See proc_pid_maps(5).
      Whitespace-separated (?) fields:

        1. start (inclusive) and end (exclusive) addresses, in hex
        2. permissions, e.g. “r-xp”
        3. offset, in hex
        4. device major:minor, in hex?
        5. inode number, in decimal
        6. pathname */
   T_e (fp = fopen("/proc/self/maps", "r"));
   while ((line = getdelim_ch(fp, '\n'))) {
      int conv_ct;
      void *start, *end;
      char path[8] = { 0 };  // length must match format string!
      conv_ct = sscanf(line, "%p-%p %*[rwxp-] %*x %*x:%*x %*u %7s",
                       &start, &end, path);
      if (conv_ct < 2) {     // will be 2 if path empty
         WARNING("please report this bug: can't parse map: %d: \"%s\"",
                 conv_ct, line);
         break;
      }
      if (strlen(path) == 0)
         anon_len += end - start;
      else if (streq(path, "[stack]"))
         stack_len += end - start;
      else if (streq(path, "[heap]"))
         heap_len += end - start;
   }
   Z_e (fclose(fp));

   // log the basics
   total_len = stack_len + heap_len + anon_len;
   total_prev = stack_prev + heap_prev + anon_prev;
   s = asprintf_ch("mem: %s: "
         "%zdkB %+zd (stac %zdkB %+zd, heap %zdkB %+zd, anon %zdkB %+zd)",
         when,
         kB(total_len), kB(total_len - total_prev),
         kB(stack_len), kB(stack_len - stack_prev),
         kB(heap_len),  kB(heap_len - heap_prev),
         kB(anon_len),  kB(anon_len - anon_prev));
   stack_prev = stack_len;
   heap_prev = heap_len;
   anon_prev = anon_len;
   DEBUG("%s", s);
#ifdef ENABLE_SYSLOG
   syslog(SYSLOG_PRI, "%s", s);
#endif

#ifdef HAVE_GC
   // log GC stuff
   GC_get_prof_stats(&ps, sizeof(ps));
   time_collecting = GC_get_full_gc_total_time(); // ms
   used = ps.heapsize_full - ps.free_bytes_full;
   used_prev = heapsize_prev - free_prev;

   s = asprintf_ch("gc:  %s: "
         "%zdkB %+zd (used %zdkB %+zd, free %zdkB %+zd, unmp %zdkB %+zd)",
         when,
         kB(ps.heapsize_full),   kB(ps.heapsize_full - heapsize_prev),
         kB(used),               kB(used - used_prev),
         kB(ps.free_bytes_full), kB(ps.free_bytes_full - free_prev),
         kB(ps.unmapped_bytes),  kB(ps.unmapped_bytes - unmapped_prev));
   heapsize_prev = ps.heapsize_full;
   free_prev     = ps.free_bytes_full;
   unmapped_prev = ps.unmapped_bytes;
   DEBUG("%s", s);
#ifdef ENABLE_SYSLOG
   syslog(SYSLOG_PRI, "%s", s);
#endif

   // GC time: **only** the format specifiers are changed to match 'long'
   s = asprintf_ch("gc:  "
         "%s: %ld collections (%+ld) in %ldms (%+ld)",
         when,
         (long)ps.gc_no, (long)(ps.gc_no - gc_no_prev),
         time_collecting, time_collecting - time_collecting_prev);
   gc_no_prev = ps.gc_no;
   time_collecting_prev = time_collecting;
   DEBUG("%s", s);
#ifdef ENABLE_SYSLOG
   syslog(SYSLOG_PRI, "%s", s);
#endif
#endif
}

/* Change the size of allocated buffer p to size bytes. Like realloc(3), if p
   is NULL, then this function is equivalent to malloc_ch(). Unlike free(3),
   size may not be zero.

   If size is greater than the existing buffer length, the initial content of
   new bytes is undefined. If size is less than the existing buffer length,
   this function may be a no-op; i.e., it may be impossible to shrink a
   buffer’s actual allocation.

   pointerful is as in malloc_ch(). If p is non-NULL, it must match the the
   original allocation, though this is not validated. */\
#undef realloc
void *realloc_ch(void *p, size_t size, bool pointerful)
{
   void *p_new;

   T__ (size > 0);

   if (p == NULL)
      p_new = malloc_ch(size, pointerful);  // no GC_REALLOC_ATOMIC()
   else {
#ifdef HAVE_GC
      p_new = GC_REALLOC(p, size);
#else
      p_new = realloc(p, size);
#endif
   }

   T_e (p_new);
   return p_new;
}
#define realloc FN_BLOCKED