From f678fd3e9c61318010c1c19e7042a47a7f9e8d1c Mon Sep 17 00:00:00 2001 From: Nick Muerdter <12112+GUI@users.noreply.github.com> Date: Tue, 28 Jan 2025 22:52:19 -0700 Subject: [PATCH] Begin work on Trafficserver 10 upgrade. --- config/schema.cue | 14 +- .../utils/generate_runtime_config.lua | 1 - tasks/deps/trafficserver | 40 ++- .../etc/perp/trafficserver/rc.main.etlua | 5 +- .../{socks.config => jsonrpc.yaml} | 0 .../etc/trafficserver/logging.yaml.etlua | 6 +- .../etc/trafficserver/records.config.etlua | 229 ---------------- .../etc/trafficserver/records.yaml.etlua | 257 ++++++++++++++++++ 8 files changed, 293 insertions(+), 259 deletions(-) rename templates/etc/trafficserver/{socks.config => jsonrpc.yaml} (100%) delete mode 100644 templates/etc/trafficserver/records.config.etlua create mode 100644 templates/etc/trafficserver/records.yaml.etlua diff --git a/config/schema.cue b/config/schema.cue index 4bd0cae97..ebce9f343 100644 --- a/config/schema.cue +++ b/config/schema.cue @@ -143,8 +143,18 @@ import "path" storage: { size: string | *"256M" } - embedded_server_config: { - records: [...string] | *[] + records: { + cache: { + ram_cache: { + size: string | *"-1" + } + } + net: { + connections_throttle: uint | *30000 + max_requests_in: uint | *0 + max_connections_in: uint | *30000 + default_inactivity_timeout: uint | *86400 + } } } diff --git a/src/api-umbrella/utils/generate_runtime_config.lua b/src/api-umbrella/utils/generate_runtime_config.lua index e4def17ff..0f8eb4d4f 100644 --- a/src/api-umbrella/utils/generate_runtime_config.lua +++ b/src/api-umbrella/utils/generate_runtime_config.lua @@ -390,7 +390,6 @@ local function set_computed_config(config) -- since we will allow long-running streaming responses.. config["envoy"]["_route_timeout"] = "0s" config["trafficserver"]["_connect_attempts_timeout"] = math.min(5, config["nginx"]["proxy_connect_timeout"]) - config["trafficserver"]["_post_connect_attempts_timeout"] = math.min(5, config["trafficserver"]["_connect_attempts_timeout"]) config["trafficserver"]["_transaction_no_activity_timeout_out"] = config["nginx"]["proxy_read_timeout"] config["trafficserver"]["_transaction_no_activity_timeout_in"] = config["nginx"]["proxy_send_timeout"] config["nginx"]["_initial_proxy_connect_timeout"] = math.min(5, config["nginx"]["proxy_connect_timeout"]) diff --git a/tasks/deps/trafficserver b/tasks/deps/trafficserver index e29bc4130..d34c8b989 100755 --- a/tasks/deps/trafficserver +++ b/tasks/deps/trafficserver @@ -4,8 +4,8 @@ # https://github.com/apache/trafficserver/issues/10393 This issue has been # reproducible by running the following test in a loop: # bundle exec minitest test/apis/test_web_app_large_body.rb -n test_limit_is_configurable -trafficserver_version="9.1.4" -trafficserver_hash="f18aca66e444470738f6031366edc45db26942917e3fe8cba9fbb3b28173e01e06991aa9846e459047365946738931b1910719574c9c2afee72e4dfb30eab617" +trafficserver_version="10.0.2" +trafficserver_hash="5de65130d4c0997d619d0c1be6840aaffd3e17abd820d22b9294c4d78834eb71a58fed12f3166396c7169583850a8b42ed768e5af11e855c87b2fca8a10da7f3" set -e -u -x source ./tasks/helpers.sh @@ -15,30 +15,22 @@ task_working_dir download "https://archive.apache.org/dist/trafficserver/trafficserver-$trafficserver_version.tar.bz2" "sha512" "$trafficserver_hash" extract_download "trafficserver-$trafficserver_version.tar.bz2" -detect_os_release -if [[ "$ID_NORMALIZED" == "rhel" && "$VERSION_ID" == "7" ]]; then - export CC=/opt/rh/devtoolset-7/root/usr/bin/gcc - export CXX=/opt/rh/devtoolset-7/root/usr/bin/g++ -elif [[ "$ID" == "debian" && "$VERSION_ID" == "9" ]]; then - export CC=clang-7 - export CXX=clang++-7 - export CXXFLAGS="-I/usr/lib/llvm-7/include/c++/v1 -stdlib=libc++" -fi - cd "trafficserver-$trafficserver_version" -SPHINXBUILD=false ./configure \ - --disable-tests \ - --disable-silent-rules \ - --disable-maintainer-mode \ - --disable-dependency-tracking \ - --prefix="$INSTALL_PREFIX_EMBEDDED" \ - --with-jemalloc \ - --with-luajit="$STAGE_EMBEDDED_DIR/openresty/luajit" -make -j"$NPROC" -make install DESTDIR="$STAGE_DIR" -chrpath -d "$STAGE_EMBEDDED_DIR/lib/libtscore.so" -chrpath -d "$STAGE_EMBEDDED_DIR/lib/libtsmgmt.so" +PKG_CONFIG_PATH="$STAGE_EMBEDDED_DIR/openresty/luajit/lib/pkgconfig:${PKG_CONFIG_PATH:-}" \ +cmake -B build \ + -DCMAKE_INSTALL_PREFIX="$INSTALL_PREFIX_EMBEDDED" \ + -DENABLE_LUAJIT=ON \ + -DENABLE_JEMALLOC=ON + +cmake --build build -j "$NPROC" + +cmake --install build + +chrpath -d "$STAGE_EMBEDDED_DIR/lib/libswoc"*.so +chrpath -d "$STAGE_EMBEDDED_DIR/lib/libtsapi.so" +chrpath -d "$STAGE_EMBEDDED_DIR/lib/libtscppapi.so" +chrpath -d "$STAGE_EMBEDDED_DIR/lib/libyaml-cpp.so" find "$STAGE_EMBEDDED_DIR/libexec/trafficserver/" -name "*.so" -exec chrpath -d {} \; find "$STAGE_EMBEDDED_DIR/bin/" -name "traffic_*" -exec chrpath -d {} \; diff --git a/templates/etc/perp/trafficserver/rc.main.etlua b/templates/etc/perp/trafficserver/rc.main.etlua index 0cc93d9a1..88724453f 100755 --- a/templates/etc/perp/trafficserver/rc.main.etlua +++ b/templates/etc/perp/trafficserver/rc.main.etlua @@ -36,7 +36,10 @@ if [ "${1}" = "start" ]; then ln -sf /dev/stderr "<%- config['log_dir'] %>/trafficserver/traffic.out" fi - exec runtool ${run_args[@]+"${run_args[@]}"} traffic_manager --nosyslog + # Disable freelist args to use jemalloc for memory management. + # https://github.com/apache/trafficserver/issues/9162#issuecomment-1292274810 + # https://github.com/apache/trafficserver/issues/3354 + exec runtool ${run_args[@]+"${run_args[@]}"} traffic_server --disable_freelist --disable_pfreelist fi exit 0 diff --git a/templates/etc/trafficserver/socks.config b/templates/etc/trafficserver/jsonrpc.yaml similarity index 100% rename from templates/etc/trafficserver/socks.config rename to templates/etc/trafficserver/jsonrpc.yaml diff --git a/templates/etc/trafficserver/logging.yaml.etlua b/templates/etc/trafficserver/logging.yaml.etlua index 4bb8ff1a1..415f9a3c5 100644 --- a/templates/etc/trafficserver/logging.yaml.etlua +++ b/templates/etc/trafficserver/logging.yaml.etlua @@ -5,7 +5,9 @@ logging: # chi: Client: IP address of the client’s host. # caun: Client Request: Authentication User name # cqtn: Client Request: Client request timestamp - # cqtx: Client Request: HTTP client request text + # cqhm: Client Request: HTTP method + # pqu: Proxy Request: URL + # cqpv: Client Request: Request protocol and version # pssc: Proxy Response: HTTP response status code sent by Traffic Server proxy to the client. # pscl: Proxy Response: Content body length of the Traffic Server proxy response. # @@ -44,7 +46,7 @@ logging: # csssc: Cached Proxy: Response HTTP response status code of the origin server response, as cached by Traffic Server. # cqtr: Client Request: TCP reused status of the connection between the client and Traffic Server proxy, indicating whether the request was delivered through an already established connection. # cquuc: Client Request: Canonical (prior to remapping) effective URL from client request. - format: '[%] "%" % id=%<{X-Api-Umbrella-Request-ID}cqh> up_status=% time=% origin_time=% client_finish=% proxy_finish=% cache=% cache_sub=% cache_hit=% cache_write=% con_attempt=% ' + format: '[%] "% % %" % id=%<{X-Api-Umbrella-Request-ID}cqh> up_status=% time=% origin_time=% client_finish=% proxy_finish=% cache=% cache_sub=% cache_hit=% cache_write=% con_attempt=% ' logs: - mode: <%- (config["log"]["destination"] == "console") and "ascii_pipe" or "ascii" %> diff --git a/templates/etc/trafficserver/records.config.etlua b/templates/etc/trafficserver/records.config.etlua deleted file mode 100644 index bb80961a5..000000000 --- a/templates/etc/trafficserver/records.config.etlua +++ /dev/null @@ -1,229 +0,0 @@ -# vi: set ft=config : - -LOCAL proxy.local.incoming_ip_to_bind STRING <%- config["trafficserver"]["host"] %> -CONFIG proxy.config.http.server_ports STRING <%- config["trafficserver"]["port"] %> - -# Use jemalloc for memory management. -# https://github.com/apache/trafficserver/issues/9162#issuecomment-1292274810 -# https://github.com/apache/trafficserver/issues/3354 -CONFIG proxy.config.proxy_binary_opts STRING -M -f -F - -# Use the special "#-1" value to keep running as the current user (perp will -# handle switching the user). -CONFIG proxy.config.admin.user_id STRING #-1 - -# Specify where to find the TrafficServer binaries, plugins, etc. Our config -# files are in a non-default location by using TS_ROOT (this is done so that -# the config files can co-exist with the rest of the API Umbrella config files, -# and so that we can relocate the location for running tests). Therefore, we -# must explicitly point back to the original TrafficServer install directory. -# This could potentially be made cleaner without the use of TS_ROOT with the -# new proxy.config.config_dir setting -# (https://issues.apache.org/jira/browse/TS-3192), but I wasn't able to get -# that to quite work properly. -CONFIG proxy.config.bin_path STRING <%- config["_embedded_root_dir"] %>/bin -CONFIG proxy.config.plugin.plugin_dir STRING <%- config["_embedded_root_dir"] %>/libexec/trafficserver -CONFIG proxy.config.body_factory.template_sets_dir STRING <%- config["_embedded_root_dir"] %>/etc/trafficserver/body_factory - -# Log to the standard API Umbrella log directory location. -CONFIG proxy.config.log.logfile_dir STRING <%- config["log_dir"] %>/trafficserver - -# Log all diagnostic output to stderr (instead of also shipping it to the -# system-wide syslog). -CONFIG proxy.config.diags.output.diag STRING E -CONFIG proxy.config.diags.output.debug STRING E -CONFIG proxy.config.diags.output.status STRING E -CONFIG proxy.config.diags.output.note STRING E -CONFIG proxy.config.diags.output.warning STRING E -CONFIG proxy.config.diags.output.error STRING E -CONFIG proxy.config.diags.output.fatal STRING E -CONFIG proxy.config.diags.output.alert STRING E -CONFIG proxy.config.diags.output.emergency STRING E - -# Allow longer lines in logging. -CONFIG proxy.config.log.max_line_size INT 24576; -CONFIG proxy.config.log.ascii_buffer_size INT 24576; -CONFIG proxy.config.log.log_buffer_size INT 24576; - -# Disable Trafficserver's own log file rotation for these files. We'll either -# output to stdout or rotate log files with logrotate (to be consistent with -# all the other API Umbrella components). -CONFIG proxy.config.diags.logfile.rolling_enabled INT 0 -CONFIG proxy.config.output.logfile.rolling_enabled INT 0 - -<% if config["log"]["destination"] ~= "console" then %> - # If outputting to file, then logrotate will also be used for this file. - CONFIG proxy.config.log.rolling_enabled INT 0 -<% else %> - # If outputting to stdout, the error.log file can't be symlinked to - # stdout/stderr, so we still need to output to a log file, but enable - # Trafficserver's log rotation so these don't grow very big. We'll then use - # fluent-bit to tail these files and output to stdout for us. - # - # This workaround should no longer be needed once Trafficserver 10 is - # released and things can be configured to output to stdout/stderr directly: - # https://github.com/apache/trafficserver/pull/7937 - CONFIG proxy.config.log.auto_delete_rolled_files INT 1 - CONFIG proxy.config.log.max_space_mb_for_logs INT 30 - CONFIG proxy.config.log.max_space_mb_headroom INT 20 - - CONFIG proxy.config.log.rolling_enabled INT 2 - CONFIG proxy.config.log.rolling_size_mb INT 10 -<% end %> - -# Increase timeouts to match the timeouts in other pieces of the stack. -CONFIG proxy.config.http.connect_attempts_timeout INT <%- config["trafficserver"]["_connect_attempts_timeout"] %> -CONFIG proxy.config.http.post_connect_attempts_timeout INT <%- config["trafficserver"]["_post_connect_attempts_timeout"] %> -CONFIG proxy.config.http.transaction_no_activity_timeout_in INT <%- config["trafficserver"]["_transaction_no_activity_timeout_in"] %> -CONFIG proxy.config.http.transaction_no_activity_timeout_out INT <%- config["trafficserver"]["_transaction_no_activity_timeout_out"] %> - -# Disable other timeouts so that as long as the request is streaming back we -# don't close the connection and we defer to our other layers for timeouts. -CONFIG proxy.config.http.transaction_active_timeout_in INT 0 -CONFIG proxy.config.http.transaction_active_timeout_out INT 0 - -# Disable retries, since we're only connecting to Envoy. Envoy will be -# responsible for retrying connections if the connection is never established. -CONFIG proxy.config.http.connect_attempts_rr_retries INT 0 -CONFIG proxy.config.http.connect_attempts_max_retries INT 0 -CONFIG proxy.config.http.connect_attempts_max_retries_dead_server INT 0 -CONFIG proxy.config.http.connect.dead.policy INT 0 - -# Don't consider the backend down, even if some requests timeout. We don't want -# some bad API requests to mark the entire backend as being down. -CONFIG proxy.config.http.down_server.cache_time INT 0 - -# Don't append IP information to X-Forwarded-For or Client-IP headers, since -# we've already dealt with those at the first nginx proxy layer. -CONFIG proxy.config.http.insert_client_ip INT 0 -CONFIG proxy.config.http.insert_squid_x_forwarded_for INT 0 - -# Don't perform caching when the request contains cookies. -CONFIG proxy.config.http.cache.cache_responses_to_cookies INT 0 - -# Use read_while_writer to improve connection collapsing and prevent thundering -# herd scenarios for parallel requests to the same resource: -# https://docs.trafficserver.apache.org/en/7.1.x/admin-guide/configuration/cache-basics.en.html?highlight=thundering#reducing-origin-server-requests-avoiding-the-thundering-herd -# -# This is tuned to try and wait up to 2 seconds for the API response before -# making another request (this was the default in the old collapsed_connection -# plugin). -# -# Note that the retry delay is doubled on the 3rd retry onwards. So retries 1-2 -# will delay 50ms each, retries 3-21 will delay 100ms each (50*2 + 100*19 = -# 2000ms total wait). -# -# Note that read_while_writer helps for the most part, but there's still other -# edge-cases where some connections aren't collapsed. However, this is okay, -# since the current alternatives don't seem to be ideal for our uses, and -# read-while-writer does what we need. But for future references: -# -# - open_read_retry: For uncacheable responses, requests become serialized, so -# this should only ever be turned on if you know the responses will be -# cacheable (which we do not). -# - open_write_fail_action: Returning a stale copy could be appealing, but that -# doesn't currently work, so the only options are to return errors (which -# doesn't fit our use case). But if the stale handling gets implemented, this -# could be worth revisiting: -# https://issues.apache.org/jira/browse/TS-4774 -# https://issues.apache.org/jira/browse/TS-4238 -# https://issues.apache.org/jira/browse/TS-4237 -# - collapsed_forwarding experimental plugin: Might cause blocking (not -# entirely sure), but also seems to have some issues for our use-case: -# Returns 303s to user: -# https://lists.apache.org/thread.html/aa26070c52c907182bb104696bfd28d92147c0ba3082468487322b43@%3Cusers.trafficserver.apache.org%3E -# Requires enabling redirect following, which we don't want for other -# responses: -# https://lists.apache.org/thread.html/0eff5d9a53ef8fdf28be341f648c708bd651ad1208cddf71d532d78d@%3Cusers.trafficserver.apache.org%3E -CONFIG proxy.config.cache.enable_read_while_writer INT 1 -CONFIG proxy.config.cache.read_while_writer.max_retries INT 21 -CONFIG proxy.config.cache.read_while_writer_retry.delay INT 50 -CONFIG proxy.config.http.background_fill_active_timeout INT 0 -CONFIG proxy.config.http.background_fill_completed_threshold FLOAT 0.0 -CONFIG proxy.config.cache.max_doc_size INT 0 - -# Traffic Server uses this proxy_name in the Via response headers. By default, -# this is a hex-encoded IP address, but returning that information may not be -# desirable, so instead use a hard-coded name instead. -CONFIG proxy.config.proxy_name STRING api-umbrella - -# Include information in the Via response indicating whether this was a cache -# hit or not. -CONFIG proxy.config.http.insert_response_via_str INT 2 - -# Do not include the Via headers on the requests to the backend servers. This -# is for two reasons: -# - It may disable gzip responses from underlying API backend servers by -# default: https://community.akamai.com/community/web-performance/blog/2015/05/06/beware-the-via-header-disabled-compression-can-have-a-performance-impact -# - It prevents API Umbrella from making circular requests to itself as an API -# backend (which is admittedly an edge-case, but we had some usage of this): -# https://issues.apache.org/jira/browse/TS-2298 -CONFIG proxy.config.http.insert_request_via_str INT 0 - -# Don't include the TrafficServer version number in the Via headers. -CONFIG proxy.config.http.request_via_str STRING ApacheTrafficServer -CONFIG proxy.config.http.response_via_str STRING ApacheTrafficServer - -# Don't override the "Server" response header with the TrafficServer signature. -CONFIG proxy.config.http.response_server_enabled INT 0 - -# Retain the Host header we send to trafficserver for cache key purposes so -# that each host gets cached separately. -CONFIG proxy.config.url_remap.pristine_host_hdr INT 1 - -# Set the DNS nameservers used to potentially resolve a remote envoy layer. -CONFIG proxy.config.dns.resolv_conf STRING NULL -<% if config["dns_resolver"]["_nameservers_trafficserver"] then %> -CONFIG proxy.config.dns.nameservers STRING <%= config["dns_resolver"]["_nameservers_trafficserver"] %> -<% end %> - -<% if config["dns_resolver"]["allow_ipv6"] then %> -CONFIG proxy.config.hostdb.ip_resolve STRING ipv4;ipv6;only -<% else %> -CONFIG proxy.config.hostdb.ip_resolve STRING ipv4;only -<% end %> - -# Allow old DNS results to be used while new ones are fetched in the -# background. Since we're only resolving the possible remote envoy host at this -# layer, this just helps things remain up even if DNS goes down temporarily. -CONFIG proxy.config.hostdb.serve_stale_for INT 5184000 # 60 days - -# For hosts that don't exist, remember the invalid DNS responses, since we -# assume the Envoy resolution really shouldn't fail. -CONFIG proxy.config.hostdb.fail.timeout INT 5184000 # 60 days - -# Use more standard round-robin for DNS results (rather than sticky). -CONFIG proxy.config.hostdb.strict_round_robin INT 1 - -# Enable so_keepalive on the incoming and outgoing sockets to better detect -# keepalive hangups. -CONFIG proxy.config.net.sock_option_flag_in INT 3 -CONFIG proxy.config.net.sock_option_flag_out INT 3 - -# Keepalive connections to backend Envoy server. Retain some idle connections -# open to improve performance. -CONFIG proxy.config.http.keep_alive_no_activity_timeout_out INT <%= config["router"]["api_backends"]["keepalive_idle_timeout"] %> -CONFIG proxy.config.http.per_server.connection.min INT <%= config["router"]["api_backends"]["keepalive_connections"] %> - -# Since we're only connecting to our backend Envoy server, we can significantly -# reduce connections TrafficServer tries to open by ignoring the Host for -# session reuse (since Envoy will be responsible for actually connecting to the -# backend servers). -CONFIG proxy.config.http.server_session_sharing.match STRING ip - -<% if config["envoy"]["scheme"] == "https" then %> -CONFIG proxy.config.ssl.client.verify.server.properties STRING ALL -CONFIG proxy.config.ssl.client.verify.server.policy STRING ENFORCED -CONFIG proxy.config.ssl.client.CA.cert.path STRING <%- config["etc_dir"] %>/trafficserver -CONFIG proxy.config.ssl.client.CA.cert.filename STRING ca_cert.pem -CONFIG proxy.config.ssl.client.sni_policy STRING @<%- config["envoy"]["tls_certificate"]["domain"] %> -<% end %> - -# Enable for debug logging. -# CONFIG proxy.config.diags.debug.enabled INT 1 -# CONFIG proxy.config.diags.debug.tags STRING .* - -# Additional custom configuration -<% for _, record in ipairs(config["trafficserver"]["embedded_server_config"]["records"]) do %> - <%- record %> -<% end %> diff --git a/templates/etc/trafficserver/records.yaml.etlua b/templates/etc/trafficserver/records.yaml.etlua new file mode 100644 index 000000000..6bc2f57f9 --- /dev/null +++ b/templates/etc/trafficserver/records.yaml.etlua @@ -0,0 +1,257 @@ +records: + incoming_ip_to_bind: <%- json_encode(config["trafficserver"]["host"]) %> + + admin: + # Use the special "#-1" value to keep running as the current user (perp + # will handle switching the user). + user_id: "#-1" + + # Specify where to find the TrafficServer binaries, plugins, etc. Our config + # files are in a non-default location by using TS_ROOT (this is done so that + # the config files can co-exist with the rest of the API Umbrella config + # files, and so that we can relocate the location for running tests). + # Therefore, we must explicitly point back to the original TrafficServer + # install directory. This could potentially be made cleaner without the use + # of TS_ROOT with the new proxy.config.config_dir setting + # (https://issues.apache.org/jira/browse/TS-3192), but I wasn't able to get + # that to quite work properly. + bin_path: <%- json_encode(path_join(config["_embedded_root_dir"], "bin")) %> + body_factory: + template_sets_dir: <%- json_encode(path_join(config["_embedded_root_dir"], "etc/trafficserver/body_factory")) %> + plugin: + plugin_dir: <%- json_encode(path_join(config["_embedded_root_dir"], "libexec/trafficserver")) %> + + output: + logfile: + name: <%- json_encode(config["log"]["destination"] == "console" and "stdout" or "traffic.out") %> + + # Disable Trafficserver's own log file rotation for these files. We'll + # either output to stdout or rotate log files with logrotate (to be + # consistent with all the other API Umbrella components). + rolling_enabled: 0 + + diags: + logfile: + filename: <%- json_encode(config["log"]["destination"] == "console" and "stdout" or "diags.log") %> + + # Disable Trafficserver's own log file rotation for these files. We'll + # either output to stdout or rotate log files with logrotate (to be + # consistent with all the other API Umbrella components). + rolling_enabled: 0 + + # Log all diagnostic output to stderr (instead of also shipping it to the + # system-wide syslog). + output: + alert: E + debug: E + diag: E + emergency: E + error: E + fatal: E + note: E + status: E + warning: E + + # Enable for debug logging. + # debug: + # enabled: 1 + # tags: ".*" + + error: + logfile: + filename: <%- json_encode(config["log"]["destination"] == "console" and "stderr" or "error.log") %> + + log: + # Log to the standard API Umbrella log directory location. + logfile_dir: <%- json_encode(path_join(config["log_dir"], "trafficserver")) %> + + # Allow longer lines in logging. + ascii_buffer_size: 24576 + log_buffer_size: 24576 + max_line_size: 24576 + + # Disable Trafficserver's own log file rotation for these files. We'll + # either output to stdout or rotate log files with logrotate (to be + # consistent with all the other API Umbrella components). + rolling_enabled: 0 + + # Traffic Server uses this proxy_name in the Via response headers. By default, + # this is a hex-encoded IP address, but returning that information may not be + # desirable, so instead use a hard-coded name instead. + proxy_name: api-umbrella + + net: + connections_throttle: <%- json_encode(config["trafficserver"]["records"]["net"]["connections_throttle"]) %> + max_requests_in: <%- json_encode(config["trafficserver"]["records"]["net"]["max_requests_in"]) %> + max_connections_in: <%- json_encode(config["trafficserver"]["records"]["net"]["max_connections_in"]) %> + default_inactivity_timeout: <%- json_encode(config["trafficserver"]["records"]["net"]["default_inactivity_timeout"]) %> + + # Enable so_keepalive on the incoming and outgoing sockets to better detect + # keepalive hangups. + sock_option_flag_in: 3 + sock_option_flag_out: 3 + + http: + server_ports: <%- json_encode(config["trafficserver"]["port"]) %> + + # Increase timeouts to match the timeouts in other pieces of the stack. + connect_attempts_timeout: <%- json_encode(config["trafficserver"]["_connect_attempts_timeout"]) %> + transaction_no_activity_timeout_in: <%- json_encode(config["trafficserver"]["_transaction_no_activity_timeout_in"]) %> + transaction_no_activity_timeout_out: <%- json_encode(config["trafficserver"]["_transaction_no_activity_timeout_out"]) %> + + # Disable other timeouts so that as long as the request is streaming back + # we don't close the connection and we defer to our other layers for + # timeouts. + transaction_active_timeout_in: 0 + transaction_active_timeout_out: 0 + + # Disable retries, since we're only connecting to Envoy. Envoy will be + # responsible for retrying connections if the connection is never established. + connect_attempts_rr_retries: 0 + connect_attempts_max_retries: 0 + connect_attempts_max_retries_down_server: 0 + connect: + down: + policy: 0 + + down_server: + # Don't consider the backend down, even if some requests timeout. We + # don't want some bad API requests to mark the entire backend as being + # down. + cache_time: 0 + + # Don't append IP information to X-Forwarded-For or Client-IP headers, since + # we've already dealt with those at the first nginx proxy layer. + insert_client_ip: 0 + insert_squid_x_forwarded_for: 0 + + # Include information in the Via response indicating whether this was a + # cache hit or not. + insert_response_via_str: 2 + + # Do not include the Via headers on the requests to the backend servers. + # This is for two reasons: + # - It may disable gzip responses from underlying API backend servers by + # default: https://community.akamai.com/community/web-performance/blog/2015/05/06/beware-the-via-header-disabled-compression-can-have-a-performance-impact + # - It prevents API Umbrella from making circular requests to itself as an + # API backend (which is admittedly an edge-case, but we had some usage of + # this): https://issues.apache.org/jira/browse/TS-2298 + insert_request_via_str: 0 + + # Don't include the TrafficServer version number in the Via headers. + request_via_str: ApacheTrafficServer + response_via_str: ApacheTrafficServer + + # Don't override the "Server" response header with the TrafficServer signature. + response_server_enabled: 0 + + # Keepalive connections to backend Envoy server. Retain some idle + # connections open to improve performance. + keep_alive_no_activity_timeout_out: <%= json_encode(config["router"]["api_backends"]["keepalive_idle_timeout"]) %> + per_server: + connection: + min: <%= json_encode(config["router"]["api_backends"]["keepalive_connections"]) %> + + # Since we're only connecting to our backend Envoy server, we can significantly + # reduce connections TrafficServer tries to open by ignoring the Host for + # session reuse (since Envoy will be responsible for actually connecting to the + # backend servers). + server_session_sharing: + match: ip + + # For read_while_writer configuration in the `cache` section. + background_fill_active_timeout: 0 + background_fill_completed_threshold: 0.0 + + cache: + # Don't perform caching when the request contains cookies. + cache_responses_to_cookies: 0 + + cache: + ram_cache: + size: <%- json_encode(config["trafficserver"]["records"]["cache"]["ram_cache"]["size"]) %> + + # Use read_while_writer to improve connection collapsing and prevent + # thundering herd scenarios for parallel requests to the same resource: + # https://docs.trafficserver.apache.org/en/10.0.x/admin-guide/configuration/cache-basics.en.html#reducing-origin-server-requests-avoiding-the-thundering-herd + # + # This is tuned to try and wait up to 2 seconds for the API response before + # making another request (this was the default in the old + # collapsed_connection plugin). + # + # Note that the retry delay is doubled on the 3rd retry onwards. So retries + # 1-2 will delay 50ms each, retries 3-21 will delay 100ms each (50*2 + + # 100*19 = 2000ms total wait). + # + # Note that read_while_writer helps for the most part, but there's still + # other edge-cases where some connections aren't collapsed. However, this + # is okay, since the current alternatives don't seem to be ideal for our + # uses, and read-while-writer does what we need. But for future references: + # + # - open_read_retry: For uncacheable responses, requests become serialized, + # so this should only ever be turned on if you know the responses will be + # cacheable (which we do not). + # - open_write_fail_action: Returning a stale copy could be appealing, but + # that doesn't currently work, so the only options are to return errors + # (which doesn't fit our use case). But if the stale handling gets + # implemented, this could be worth revisiting: + # https://issues.apache.org/jira/browse/TS-4774 + # https://issues.apache.org/jira/browse/TS-4238 + # https://issues.apache.org/jira/browse/TS-4237 + # - collapsed_forwarding experimental plugin: Might cause blocking (not + # entirely sure), but also seems to have some issues for our use-case: + # Returns 303s to user: + # https://lists.apache.org/thread.html/aa26070c52c907182bb104696bfd28d92147c0ba3082468487322b43@%3Cusers.trafficserver.apache.org%3E + # Requires enabling redirect following, which we don't want for other + # responses: + # https://lists.apache.org/thread.html/0eff5d9a53ef8fdf28be341f648c708bd651ad1208cddf71d532d78d@%3Cusers.trafficserver.apache.org%3E + max_doc_size: 0 + enable_read_while_writer: 1 + read_while_writer: + max_retries: 21 + read_while_writer_retry: + delay: 50 + + dns: + # Set the DNS nameservers used to potentially resolve a remote envoy layer. + resolv_conf: null + + <% if config["dns_resolver"]["_nameservers_trafficserver"] then %> + nameservers: <%- json_encode(config["dns_resolver"]["_nameservers_trafficserver"]) %> + <% end %> + + hostdb: + ip_resolve: <%- json_encode(config["dns_resolver"]["allow_ipv6"] and "ipv4;ipv6;only" or "ipv4;only") %> + + # Allow old DNS results to be used while new ones are fetched in the + # background. Since we're only resolving the possible remote envoy host at + # this layer, this just helps things remain up even if DNS goes down + # temporarily. + serve_stale_for: 5184000 # 60 days + + # For hosts that don't exist, remember the invalid DNS responses, since we + # assume the Envoy resolution really shouldn't fail. + fail: + timeout: 5184000 # 60 days + + # Use more standard round-robin for DNS results (rather than sticky). + strict_round_robin: 1 + + url_remap: + # Retain the Host header we send to trafficserver for cache key purposes so + # that each host gets cached separately. + pristine_host_hdr: 1 + + <% if config["envoy"]["scheme"] == "https" then %> + ssl: + client: + verify: + server: + properties: ALL + policy: ENFORCED + CA: + cert: + path: <%- json_encode(path_join(config["etc_dir"], "/trafficserver")) %> + filename: ca_cert.pem + sni_policy: <%- json_encode("@" .. config["envoy"]["tls_certificate"]["domain"]) %> + <% end %>