chelnak · chelnak · Mar 8, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/README.md b/README.md
@@ -55,27 +55,37 @@ To get up and running quickly, use [examples/grafana-example.json](examples/graf
 Usage: status-cake-exporter [OPTIONS]
 
 Options:
-  --host TEXT            The host of the statuscake api.  [env var: HOST;
-                         default: https://api.statuscake.com/v1]
-  --api-key TEXT         API Key for the account.  [env var: API_KEY;
-                         required]
-  --tags TEXT            A comma separated list of tags used to filter tests
-                         returned from the api  [env var: TAGS]
-  --log-level TEXT       The log level of the application. Value can be one of
-                         {debug, info, warn, error}  [env var: LOG_LEVEL;
-                         default: info]
-  --port INTEGER         [env var: PORT; default: 8000]
-  --items-per-page TEXT  The number of items that the api will return on a
-                         page. This is a global option.  [env var:
-                         ITEMS_PER_PAGE; default: 25]
-  --help                 Show this message and exit
+  --host TEXT                     The host of the StatusCake API.  [env var:
+                                  HOST; default:
+                                  https://api.statuscake.com/v1]
+  --api-key TEXT                  API Key for the account.  [env var: API_KEY;
+                                  required]
+  --tags TEXT                     A comma separated list of tags used to
+                                  filter tests returned from the api  [env
+                                  var: TAGS]
+  --log-level TEXT                The log level of the application. Value can
+                                  be one of {debug, info, warn, error}  [env
+                                  var: LOG_LEVEL; default: info]
+  --port INTEGER                  [env var: PORT; default: 8000]
+  --enable-perf-metrics / --no-enable-perf-metrics
+                                  Enable the collection of test performance
+                                  times and expose as a metric. Warning - this
+                                  can cause additional usage of the statuscake
+                                  API and slow down runtime  [env var:
+                                  ENABLE_PERF_METRICS; default: no-enable-
+                                  perf-metrics]
+  --items-per-page TEXT           The number of items that the api will return
+                                  on a page. This is a global option.  [env
+                                  var: ITEMS_PER_PAGE; default: 25]
+  --help                          Show this message and exit.
 ```
 
 ## Metrics
 
 | Name| Type | Description |
 |-----|------|-------------|
 | status_cake_test_info | Gauge |A basic listing of the tests under the current account. |
+| status_cake_test_status | Gauge | Current test status (1 is up, 0 is down) |
 | status_cake_test_uptime_percent | Gauge | Tests and their uptime percentage |
 | status_cake_test_performance | Gauge | Tests and their performance percentage |
 

diff --git a/status_cake_exporter/_status_cake.py b/status_cake_exporter/_status_cake.py
@@ -84,7 +84,18 @@ def __paginate_response(
         params: DefaultPaginationParameters = {"page": 1, "limit": self.per_page}
         params = args | params if args else params
 
-        response = func(**params)
+        def __retry_backoff(func, **kwargs):
+            try:
+                return func(**kwargs)
+            except ApiException as e:
+                if e.status == 429:
+                    backoff=int(e.headers["x-ratelimit-reset"])
+                    logger.debug(f"Hit statuscake API rate limit. Waiting {backoff} seconds before retrying...")
+                    sleep(backoff)
+                    return __retry_backoff(func, **kwargs)
+                raise e
+
+        response = __retry_backoff(func,**params)
         metadata = response["metadata"]
         logger.debug(
             f"Received {metadata['total_count']} tests across {metadata['page_count']} page(s)"
@@ -94,7 +105,7 @@ def __paginate_response(
         while params["page"] < metadata["page_count"]:
             params["page"] += 1
             logger.debug(f"Fetching page {params['page']} of {metadata['page_count']}")
-            paged_response = func(**params)
+            paged_response = __retry_backoff(func,**params)
             data.extend(paged_response["data"])
 
             sleep(1)
@@ -155,12 +166,13 @@ def list_maintenance_windows(self) -> list[dict[str, Any]]:
             logger.error(f"Error while fetching maintenance windows: {e}")
             raise e
 
-    def list_tests(self, tags: str = "") -> list[dict]:
+    def list_tests(self, tags: str = "", enable_perf_metrics: bool = False) -> list[dict]:
         """
         Returns a list of tests
 
         Args:
             tags: [str] A comma separated list of tags to filter by.
+            enable_perf_metrics: [bool] Enable collection of performance data.
 
         Returns:
             list[dict[str, Any]]
@@ -180,14 +192,15 @@ def list_tests(self, tags: str = "") -> list[dict]:
             )
 
             # Fetch the performance of each test and add it to the response
-            for test in response:
-                history = self.get_test_history(test["id"])
-                if history["data"]:
-                    test["performance"] = history["data"][0]["performance"]
-                else:
-                    logger.warning(f"No performance data found for test ID {test['id']}")
-
-            print(response)
+            if enable_perf_metrics:
+                for test in response:
+                    history = self.get_test_history(test["id"])
+                    if history["data"]:
+                        test["performance"] = history["data"][0]["performance"]
+                    else:
+                        logger.warning(f"No performance data found for test ID {test['id']}")
+
+            logger.debug(response)
             return response
 
         # https://github.com/StatusCakeDev/statuscake-py/issues/8

diff --git a/status_cake_exporter/_test_collector.py b/status_cake_exporter/_test_collector.py
@@ -85,7 +85,7 @@ def transform(
                 ),  # This is random but we get an ApiAttributeError if we don't do this.
                 "test_name": i["name"],
                 "test_url": i["website_url"],
-                "test_status_int": get_uptime_status(i["status"]),
+                "test_status_int": get_uptime_status(str(i["status"])),
                 "test_uptime_percent": str(i["uptime"]),
                 "maintenance_status_int": get_test_maintenance_status(
                     i["id"], tests_in_maintenance
@@ -104,7 +104,7 @@ def transform(
 class TestCollector(Collector):
     """The collector subclass responsible for gathering test metrics from the StatusCake API."""
 
-    def __init__(self, host: str, api_key: str, per_page: int, tags: str):
+    def __init__(self, host: str, api_key: str, per_page: int, tags: str, enable_perf_metrics: bool):
         """
         Args:
             host: [str] The host of the StatusCake API
@@ -116,6 +116,7 @@ def __init__(self, host: str, api_key: str, per_page: int, tags: str):
         self.api_key: str = api_key
         self.per_page: int = per_page
         self.tags: str = tags
+        self.enable_perf_metrics: bool = enable_perf_metrics
 
     def collect(self):
         """
@@ -138,26 +139,42 @@ def collect(self):
             )
 
             logger.debug("Fetching uptime tests")
-            tests = statuscake.list_tests(self.tags)
+            tests = statuscake.list_tests(self.tags, self.enable_perf_metrics)
 
             metrics = transform(tests, tests_in_maintenance)
             if len(metrics) == 0:
                 logger.info("There are no test metrics to publish.")
                 return
 
             # status_cake_test_info - gauge
+            info_labels = ["test_id", "test_name", "test_type", "test_url"]
             logger.info(f"Publishing {len(metrics)} test metric(s).")
             info_gauge = GaugeMetricFamily(
                 "status_cake_test_info",
                 "A basic listing of the tests under the current account.",
-                labels=list(metrics[0].keys()),
+                labels=info_labels,
             )
-
             for i in metrics:
-                info_gauge.add_metric(list(i.values()), float(i["test_status_int"]))
+                info_dict = { x:i[x] for x in info_labels}
+                # https://www.robustperception.io/why-info-style-metrics-have-a-value-of-1/
+                info_gauge.add_metric(list(info_dict.values()), 1.0)
 
             yield info_gauge
 
+            # status_cake_test_status - gauge
+            logger.info(f"Publishing {len(metrics)} status metric(s).")
+            status_gauge = GaugeMetricFamily(
+                "status_cake_test_status",
+                "Tests and their current status",
+                labels=["test_id"],
+            )
+
+            for i in metrics:
+                print(i)
+                status_gauge.add_metric([i["test_id"]], float(i["test_status_int"]))
+
+            yield status_gauge
+
             # status_cake_test_uptime_percent - gauge
             logger.info(f"Publishing {len(metrics)} uptime metric(s).")
             uptime_gauge = GaugeMetricFamily(

diff --git a/status_cake_exporter/app.py b/status_cake_exporter/app.py
@@ -30,6 +30,7 @@ def exporter(
         envvar="LOG_LEVEL",
     ),
     port: int = typer.Option(8000, envvar="PORT"),
+    enable_perf_metrics: bool = typer.Option(False, help="Enable the collection of test performance times and expose as a metric. Warning - this can cause additional usage of the statuscake API and slow down collection", envvar="ENABLE_PERF_METRICS"),
     items_per_page=typer.Option(
         25,
         help="The number of items that the api will return on a page. This is a global option.",
@@ -54,7 +55,7 @@ def exporter(
         start_http_server(port)
 
         logger.info("Registering collectors.")
-        test_collector = TestCollector(host, api_key, items_per_page, tags)
+        test_collector = TestCollector(host, api_key, items_per_page, tags, enable_perf_metrics)
         REGISTRY.register(test_collector)
 
         while True: