/*******************************************************************************
 subrequest.c - Apache subrequest utility functions

 Copyright (C) 2020-2026 CodeShop B.V.
 http://www.code-shop.com

 For licensing see the LICENSE file
******************************************************************************/

#include <http_core.h>
#include <http_config.h>
#include <http_log.h>
#include <http_request.h>
#include <apr_date.h>
#include <apr_strings.h>

#ifdef _WIN32
#define snprintf _snprintf
#endif

#include <output_bucket.h>
#include <headers_t.h>
#include <buckets_t.h>

#include <ap_dump.h>
#include <ap_log.h>
#include <subrequest.h>

#if AP_SERVER_MAJORVERSION_NUMBER > 2 || \
   (AP_SERVER_MAJORVERSION_NUMBER == 2 && AP_SERVER_MINORVERSION_NUMBER >= 4)
#define APACHE24
#endif

#define MAX_PROXY_RETRIES 2

// check if mod_proxy and mod_proxy_http are available
static int proxy_available(void)
{
  static int status = -1;
  if(status == -1)
  {
    status =
      ap_find_linked_module("mod_proxy.c") != NULL &&
      ap_find_linked_module("mod_proxy_http.c") != NULL ?
      1 : 0;
  }
  return status;
}

static char const subreq_prefix[] = "/usp_internal_subreq";

static int subreq_translate(request_rec* r)
{
  TRACE1_R(r, "subreq_translate: enter, subreq=%s hostname=%s uri=%s "
    "filename=%s args=%s", (r->main == NULL ? "no" : "yes"), r->hostname,
    r->uri, r->filename, r->args);

  if(r->main == NULL)
  {
    TRACE1_R(r, "subreq_translate: decline for main request");
    return DECLINED;
  }

  if(r->uri == NULL)
  {
    TRACE1_R(r, "subreq_translate: decline, uri is NULL");
    return DECLINED;
  }

  if(strcmp(r->uri, subreq_prefix) != 0)
  {
    TRACE1_R(r, "subreq_translate: decline for non-subreq URI %s", r->uri);
    return DECLINED;
  }

  if(r->args == NULL)
  {
    TRACE1_R(r, "subreq_translate: decline for URI %s without args", r->uri);
    return DECLINED;
  }

  if(!proxy_available())
  {
    ERROR_R(r, "subreq_translate: mod_proxy and mod_proxy_http not enabled, "
      "unable to handle %s", r->args);
    return HTTP_BAD_GATEWAY;
  }

  // Duplicate the query args, since ap_unescape_url() works in place.
  char* sub_url = apr_pstrdup(r->pool, r->args);
  ap_unescape_url(sub_url);

  // Force subrequest to go through the proxy layer.
  r->filename = apr_pstrcat(r->pool, "proxy:", sub_url, NULL);
  r->args = NULL;
  r->handler = "proxy-server";
  r->proxyreq = PROXYREQ_REVERSE;
  apr_table_setn(r->notes, "proxy-nocanon", "1");
  DEBUG_R(r, "subreq_translate: rewritten to %s [OK]", r->filename);

  return OK;
}

static const char subreq_filter_name[] = "UspSubReqFilter";
static ap_filter_rec_t* subreq_filter_handle = NULL;

static apr_status_t subreq_out_filter(ap_filter_t *f, apr_bucket_brigade *pbbIn)
{
  DEBUG_R(f->r, "subreq filter called");

  buckets_t* buckets = (buckets_t*)f->ctx;

  //request_rec *r = f->r;
  //conn_rec *c = r->connection;
  apr_bucket *pbktIn;

  for (pbktIn = APR_BRIGADE_FIRST(pbbIn);
       pbktIn != APR_BRIGADE_SENTINEL(pbbIn);
       pbktIn = APR_BUCKET_NEXT(pbktIn))
  {
    char const *data;
    apr_size_t len;

    if (APR_BUCKET_IS_EOS(pbktIn))
    {
      //apr_bucket *pbktEOS = apr_bucket_eos_create(c->bucket_alloc);
      //APR_BRIGADE_INSERT_TAIL(pbbOut, pbktEOS);
      continue;
    }

    // read
    apr_status_t rv = apr_bucket_read(pbktIn, &data, &len, APR_BLOCK_READ);
    if(rv != APR_SUCCESS)
    {
      ERROR_R(f->r, "subreq_out_filter: apr_bucket_read error %d", rv);
      return rv;
    }

    DEBUG_R(f->r, "subreq read=%" APR_SIZE_T_FMT " bytes", len);

    // write
    if(len)
    {
      bucket_insert_tail(buckets,
                         bucket_heap_create((uint8_t const *) data, len));
    }
  }

  // Q: is there any advantage to passing a brigade for each bucket?
  // A: obviously, it can cut down server resource consumption, if this
  // experimental module was fed a file of 4MB, it would be using 8MB for
  // the 'read' buckets and the 'write' buckets.
  //
  // Note it is more efficient to consume (destroy) each bucket as it's
  // processed above than to do a single cleanup down here.  In any case,
  // don't let our caller pass the same buckets to us, twice;
  apr_brigade_cleanup(pbbIn);

  //return ap_pass_brigade(f->next, pbbOut);
  return APR_SUCCESS;
}

// * @return  1 if valid content-range,
// *          0 if no content-range,
// *         -1 if malformed content-range

static int dav_parse_range(request_rec *r,
                           apr_off_t *range_start, apr_off_t *range_end,
                           apr_off_t *instance_length)
{
  const char *range_c;
  char *range;
  char *dash;
  char *slash;
  char *errp;

  range_c = apr_table_get(r->headers_out, "content-range");
  if (range_c == NULL)
  {
    /* no header */
    return 0;
  }

  range = apr_pstrdup(r->pool, range_c);
  if (strncasecmp(range, "bytes ", 6) != 0
      || (dash = ap_strchr(range + 6, '-')) == NULL
      || (slash = ap_strchr(range + 6, '/')) == NULL)
  {
    /* malformed header */
    return -1;
  }

  *dash++ = *slash++ = '\0';

  // detect invalid ranges
  if (apr_strtoff(range_start, range + 6, &errp, 10)
      || *errp || *range_start < 0)
  {
    return -1;
  }
  if (apr_strtoff(range_end, dash, &errp, 10)
      || *errp || *range_end < 0 || *range_end < *range_start)
  {
    return -1;
  }

  if (*slash == '*')
  {
    *instance_length = -1;
  }
  else if (apr_strtoff(instance_length, slash, &errp, 10)
           || *errp || *instance_length <= *range_end)
  {
    return -1;
  }

  // we now have a valid range
  return 1;
}

static char *escape_args(apr_pool_t* pool, char const* args)
{
  static uint8_t const escape_table[] =
  {
    0xff, 0xff, 0xff, 0xff, // 11111111 11111111 11111111 11111111
                            //  !"#$%&' ()*+,-./ 01234567 89:;<=>?
    0x96, 0x10, 0x00, 0x11, // 10010110 00010000 00000000 00010001
                            // @ABCDEFG HIJKLMNO PQRSTUVW XYZ[\]^_
    0x00, 0x00, 0x00, 0x00, // 00000000 00000000 00000000 00000000
                            // `abcdefg hijklmno pqrstuvw xyz{|}~
    0x00, 0x00, 0x00, 0x01, // 00000000 00000000 00000000 00000001
    0xff, 0xff, 0xff, 0xff, // 11111111 11111111 11111111 11111111
    0xff, 0xff, 0xff, 0xff, // 11111111 11111111 11111111 11111111
    0xff, 0xff, 0xff, 0xff, // 11111111 11111111 11111111 11111111
    0xff, 0xff, 0xff, 0xff, // 11111111 11111111 11111111 11111111
  };
  static uint8_t const bitmask[] =
  {
    0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01
  };
  static const char hex_upper[] = "0123456789ABCDEF";
  char const* src;
  size_t size;
  char* result;
  char* dst;

  // Determine size to allocate from the apr pool.
  for(src = args, size = 0; *src != '\0'; ++src)
  {
    uint8_t c = (uint8_t)*src;
    size += (escape_table[c >> 3] & bitmask[c & 7]) ? 3 : 1;
  }

  // Allocate the required size, and encode.
  dst = result = apr_palloc(pool, size + 1);
  for(src = args; *src != '\0'; ++src)
  {
    uint8_t c = (uint8_t)*src;
    if(escape_table[c >> 3] & bitmask[c & 7])
    {
      *dst++ = '%';
      *dst++ = hex_upper[(c >> 4) & 15];
      *dst++ = hex_upper[(c >> 0) & 15];
    }
    else
    {
      *dst++ = *src;
    }
  }
  *dst = '\0';

  return result;
}

int subreq_download(void* ctx, char const* url, uint64_t* offset, uint32_t* len,
                    uint64_t* size, headers_t* headers, buckets_t* buckets)
{
  int status = HTTP_OK;
  request_rec* r = (request_rec*)ctx;

  uint64_t request_offset = *offset;
  uint32_t request_len = *len;

#if 1
  DEBUG_R(r, "subreq uri=%s offset=%" APR_UINT64_T_FMT " len=%u size=%"
    APR_UINT64_T_FMT,
    url, *offset, *len, *size);
#endif

  if(!proxy_available())
  {
    ERROR_R(r, "mod_proxy and mod_proxy_http not enabled, unable to download "
      "%s via subrequest", url);
    return HTTP_BAD_GATEWAY;
  }

  const char* encoded = escape_args(r->pool, url);
  TRACE1_R(r, "urlencoded: %s", encoded);
  const char* sub_url = apr_pstrcat(r->pool, subreq_prefix, "?", encoded, NULL);

  TRACE1_R(r, "calling ap_sub_req_lookup_uri, sub_url=%s, r=%s", sub_url,
        dump_request_rec(r->pool, r));
  request_rec* sr = ap_sub_req_lookup_uri(sub_url, r, NULL);
  TRACE1_R(r, "called ap_sub_req_lookup_uri, sr=%s",
        dump_request_rec(r->pool, sr));
  if(sr == NULL)
  {
    DEBUG_R(r, "creating subrequest failed");
    return AP_FILTER_ERROR;
  }
  else if(!ap_is_HTTP_SUCCESS(sr->status))
  {
    DEBUG_R(r, "after creating subrequest, subreq status is %d", sr->status);
    ap_destroy_sub_req(sr);
    return AP_FILTER_ERROR;
  }

#ifdef APACHE24
  int added_kept_body = 0;
  if(sr->kept_body == NULL)
  {
    TRACE1_R(r, "calling apr_brigade_create on sr");
    sr->kept_body = apr_brigade_create(sr->pool, sr->connection->bucket_alloc);
    TRACE1_R(r, "called apr_brigade_create on sr=%s", dump_request_rec(r->pool, sr));
    added_kept_body = 1;
  }
  else
  {
    TRACE1_R(r, "kept_body already set in main request, not adding it");
  }
#endif

  // add Range header for byte range requests.
  //
  // len == UINT32_MAX && offset == 0          --> no range, get everything
  // len == UINT32_MAX && offset == UINT64_MAX --> no range, get everything
  // len == UINT32_MAX && offset != UINT64_MAX --> Range: bytes=${offset}-
  // len != UINT32_MAX && offset == UINT64_MAX --> Range: bytes=-${len}
  // len != UINT32_MAX && offset != UINT64_MAX --> Range: bytes=${offset}-${offset+len-1}
  char* range;
  if(*len == UINT32_MAX)
  {
    if(*offset == 0 || *offset == UINT64_MAX)
    {
      range = NULL;
    }
    else
    {
      range = apr_psprintf(sr->pool, "bytes=%" APR_UINT64_T_FMT "-", *offset);
    }
  }
  else
  {
    if(*offset == UINT64_MAX)
    {
      range = apr_psprintf(sr->pool, "bytes=-%u", *len);
    }
    else
    {
      range = apr_psprintf(sr->pool, "bytes=%" APR_UINT64_T_FMT "-%"
        APR_UINT64_T_FMT,
        *offset, *offset + *len - 1);
    }
  }
  if(range != NULL)
  {
    TRACE1_R(r, "setting Range: %s", range);
    apr_table_set(sr->headers_in, "Range", range);
  }
  else
  {
    TRACE1_R(r, "unsetting Range");
    apr_table_unset(sr->headers_in, "Range");
  }

  TRACE1_R(r, "calling ap_add_output_filter_handle");
  ap_filter_t* filter = ap_add_output_filter_handle(subreq_filter_handle,
                                                    buckets, sr, r->connection);
  TRACE1_R(r, "called ap_add_output_filter_handle, filter=%pp, sr->status=%d",
        filter, sr->status);
  if(filter == NULL)
  {
    DEBUG_R(r, "adding output filter failed");
    status = AP_FILTER_ERROR;
  }
  else if(!ap_is_HTTP_SUCCESS(sr->status))
  {
    DEBUG_R(r, "after adding output filter, subreq status is %d", sr->status);
    status = AP_FILTER_ERROR;
  }
  else if(sr->filename == NULL)
  {
    DEBUG_R(r, "after adding output filter, subreq filename is NULL");
    status = AP_FILTER_ERROR;
  }
  else
  {
    int retries = 0;
    for(;;)
    {
      TRACE1_R(r, "calling ap_run_sub_req, retries=%d, sr=%s", retries,
        dump_request_rec(r->pool, sr));
      int retcode = ap_run_sub_req(sr);
      TRACE1_R(r, "called ap_run_sub_req, retries=%d, retcode=%d, sr=%s", retries,
        retcode, dump_request_rec(r->pool, sr));

      if(retcode == OK)
      {
        status = sr->status;

        apr_off_t range_start;
        apr_off_t range_end;
        apr_off_t instance_length;
        int has_range = dav_parse_range(sr, &range_start, &range_end,
          &instance_length);

        DEBUG_R(r, "subreq has_range=%d, range_start=%" APR_OFF_T_FMT
          ", range_end=%" APR_OFF_T_FMT ", instance_length=%" APR_OFF_T_FMT,
          has_range, range_start, range_end, instance_length);
        if(has_range < 0)
        {
          /* RFC 2616 14.16: If we receive an invalid Content-Range we must
           * not use the content.
           */
          *offset = 0;
          *len = 0;
          *size = 0;
        }
        else if(has_range)
        {
          // Sanity check the returned range and buckets.
          apr_off_t range_size = range_end + 1 - range_start;
          if(range_size < 0 || range_size > UINT32_MAX)
          {
            ERROR_R(r, "invalid Content-Range %" APR_OFF_T_FMT "-%"
              APR_OFF_T_FMT " in reply", range_start, range_end);
            return HTTP_INTERNAL_SERVER_ERROR;
          }
          uint64_t actual_size = buckets_size(buckets);
          if(actual_size > UINT32_MAX)
          {
            ERROR_R(r, "buckets size %" APR_UINT64_T_FMT " is too large",
              actual_size);
            return HTTP_INTERNAL_SERVER_ERROR;
          }
          else if((uint64_t)range_size != actual_size)
          {
            ERROR_R(r, "buckets size %" APR_UINT64_T_FMT" does not match range "
              "size %" APR_OFF_T_FMT,
              actual_size, range_size);
            return HTTP_INTERNAL_SERVER_ERROR;
          }

          *offset = range_start;
          *len = (uint32_t)range_size;
          *size = instance_length < 0 ? UINT64_MAX : (uint64_t)instance_length;
          TRACE1_R(r, "with range, offset=%" APR_UINT64_T_FMT " len=%u size=%"
            APR_UINT64_T_FMT,
            *offset, *len, *size);
        }
        else
        {
          uint64_t actual_size = buckets_size(buckets);
          if(actual_size > UINT32_MAX)
          {
            ERROR_R(r, "buckets size %" APR_UINT64_T_FMT " is too large",
              actual_size);
            return HTTP_INTERNAL_SERVER_ERROR;
          }

          *offset = 0;
          *len = (uint32_t)actual_size;
          *size = *len;
          TRACE1_R(r, "no range, offset=%" APR_UINT64_T_FMT " len=%u size=%"
            APR_UINT64_T_FMT,
            *offset, *len, *size);
        }

        /* read the last-modified date; if the date is bad, then delete it */
        const char* lastmods = apr_table_get(sr->headers_out, "Last-Modified");
        if(lastmods != NULL)
        {
          apr_time_t lastmod = apr_date_parse_http(lastmods);
          if(lastmod != APR_DATE_BAD)
          {
            headers_set_updated_at(headers, lastmod);
          }
        }

        break;
      }

      // Ref: https://bz.apache.org/bugzilla/show_bug.cgi?id=37770
      //
      // In case the origin server closes a kept-alive connection because it
      // is idle, while the proxy worker is just sending the request, it will
      // result in a read error. The proxy worker then logs "AH01102: error
      // reading status line from remote server", and returns http status 502
      // (bad gateway).
      //
      // Unfortunately, the only way to detect this exact condition is to
      // inspect the request_req's "error-notes" entry, which is set by
      // ap_proxyerror(), and check if it contains the string "Error reading
      // from remote server".
      //
      // In any other case, it is a 'normal' proxy error, such as a DNS
      // lookup failure, or a socket error while reading the response.

      char const* error_notes = apr_table_get(sr->notes, "error-notes");
      if(retcode != HTTP_BAD_GATEWAY ||
        error_notes == NULL ||
        ap_strstr(error_notes, "Error reading from remote server") == NULL)
      {
        DEBUG_R(r, "ap_run_sub_req failed with retcode %d, error_notes %s",
          retcode, error_notes);
        status = retcode;
        break;
      }

      if(retries >= MAX_PROXY_RETRIES)
      {
        DEBUG_R(r, "ap_run_sub_req failed with retcode %d, even after %d retries",
          retcode, retries);
        status = retcode;
        break;
      }

      ++retries;
      WARNING_R(r, "proxy error due to keepalive race condition, retry %d", retries);
    }
  }

#ifdef APACHE24
  if(added_kept_body != 0 && sr->kept_body != NULL)
  {
    if(!APR_BRIGADE_EMPTY(sr->kept_body))
    {
      DEBUG_R(r, "kept_body brigade not empty before destruction");
    }
    TRACE1_R(r, "calling apr_brigade_destroy on sr->kept_body");
    apr_status_t apr_status = apr_brigade_destroy(sr->kept_body);
    sr->kept_body = NULL;
    TRACE1_R(r, "called apr_brigade_destroy on sr->kept_body, apr_status=%d",
      apr_status);
  }
#endif

  ap_destroy_sub_req(sr);

  if(!ap_is_HTTP_SUCCESS(status))
  {
    ERROR_R(r, "subreq uri=%s [%" APR_UINT64_T_FMT ",%u> bad status %d",
      url, request_offset, request_len, status);
  }
  else
  {
    DEBUG_R(r, "subreq uri=%s [%" APR_UINT64_T_FMT ",%u> got [%"
      APR_UINT64_T_FMT ",%u>",
      url, request_offset, request_len, *offset, *len);
  }

  return status;
}

void subreq_init(void)
{
  // It is important that our translate handler runs before any rewrite rules,
  // or before mod_proxy kicks in.
  static const char* const run_before[] =
  {
    "mod_proxy.c",
    "mod_rewrite.c",
    NULL
  };

  // URI-to-filename translation
  ap_hook_translate_name(subreq_translate, NULL, run_before, APR_HOOK_FIRST);

  // register subrequest output filter
  subreq_filter_handle = ap_register_output_filter(subreq_filter_name,
                                                   subreq_out_filter, NULL,
                                                   AP_FTYPE_RESOURCE);
}

// End Of File

