From 8c907c48ca9114534c51cd15e6dc616b6b47f206 Mon Sep 17 00:00:00 2001 From: jdv Date: Thu, 4 Jun 2026 10:47:50 +0200 Subject: [PATCH] making capi rate limiting easier to find and warning in install docs --- crowdsec-docs/sidebarsUnversioned.ts | 5 +++ .../getting_started/installation/docker.mdx | 13 ++++++ .../unversioned/troubleshooting/intro.md | 1 + .../troubleshooting/issue_capi_403.md | 45 ++++++++++--------- .../troubleshooting/security_engine.mdx | 4 ++ 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/crowdsec-docs/sidebarsUnversioned.ts b/crowdsec-docs/sidebarsUnversioned.ts index a9e32d269..86283947b 100644 --- a/crowdsec-docs/sidebarsUnversioned.ts +++ b/crowdsec-docs/sidebarsUnversioned.ts @@ -811,6 +811,11 @@ const sidebarsUnversionedConfig: SidebarConfig = { id: "troubleshooting/security_engine", label: "Security Engine", }, + { + type: "doc", + id: "troubleshooting/capi_403", + label: "Central API 403 / Rate Limiting", + }, { type: "doc", id: "troubleshooting/remediation_components", diff --git a/crowdsec-docs/unversioned/getting_started/installation/docker.mdx b/crowdsec-docs/unversioned/getting_started/installation/docker.mdx index 701921f9c..d82fa0553 100644 --- a/crowdsec-docs/unversioned/getting_started/installation/docker.mdx +++ b/crowdsec-docs/unversioned/getting_started/installation/docker.mdx @@ -108,6 +108,19 @@ depends_on: - "reverse-proxy" ``` +##### Health checks + +If you add a health check, use `cscli lapi status` — not `cscli capi status`. The latter calls the Central API on every check and can trigger [CAPI rate limiting](/u/troubleshooting/capi_403) if the interval is short. + +```yaml +healthcheck: + test: ["CMD", "cscli", "lapi", "status"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s +``` + ## Environment variables You can find a full list of available environment variables in the [Docker image readme](https://github.com/crowdsecurity/crowdsec/blob/master/build/docker/README.md). diff --git a/crowdsec-docs/unversioned/troubleshooting/intro.md b/crowdsec-docs/unversioned/troubleshooting/intro.md index 77328e220..07d787862 100644 --- a/crowdsec-docs/unversioned/troubleshooting/intro.md +++ b/crowdsec-docs/unversioned/troubleshooting/intro.md @@ -22,6 +22,7 @@ If you received a health check alert from the CrowdSec Console, check out the [* ## Troubleshooting by Topic * [Security Engine Troubleshooting](/u/troubleshooting/security_engine) +* [Central API 403 / CAPI rate limiting](/u/troubleshooting/capi_403) * [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components) * [CTI Troubleshooting](/u/troubleshooting/cti) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_capi_403.md b/crowdsec-docs/unversioned/troubleshooting/issue_capi_403.md index fa053e479..3439ca7ad 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_capi_403.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_capi_403.md @@ -1,5 +1,5 @@ --- -title: Central API 403 (Forbidden) +title: Central API 403 / CAPI rate limiting id: capi_403 --- @@ -66,19 +66,27 @@ docker compose up Crash loops can trigger repeated logins, resulting in 403s. -#### 🛠️ Wait for ban expiry and reduce login frequency +#### 🛠️ Fix the root cause, then stop the service for 1 hour -Wait **1 hour** for the ban to expire, then ensure the engine is not repeatedly re-authenticating. +:::warning +**Stopping the service is required.** If CrowdSec keeps running while blocked, every retry resets the cooldown timer and prolongs the ban. You must stop it completely for at least 1 hour after fixing the root cause. +::: -If you run multiple instances behind the same NAT, consider using **one LAPI instance** or reducing reconnection frequency to avoid bursts. +```bash +sudo systemctl stop crowdsec +# fix the underlying issue, then wait 1 hour before starting again +sudo systemctl start crowdsec +``` -#### 🛠️ Stabilize the engine +For Docker: `docker compose down`, fix, wait 1 hour, then `docker compose up -d`. -Resolve the underlying crash or restart loop before retrying CAPI: +If you run multiple instances behind the same NAT, consider consolidating under [one LAPI instance](/u/user_guides/multiserver_setup). -```bash -sudo systemctl restart crowdsec -``` +### Health check calling `cscli capi status` too frequently + +Some third-party stacks configure a Docker health check that runs `cscli capi status` on a short interval. This authenticates against CAPI on every check and quickly exhausts the login threshold. + +See [Docker installation — Health checks](/u/getting_started/installation/docker#health-checks) for the recommended `cscli lapi status` health check configuration. ### Misconfiguration or multiple instances @@ -86,21 +94,18 @@ Running multiple instances from the same public IP can trigger rate limiting. ## Verify Resolution -After making changes: +After fixing the root cause and waiting 1 hour: -Restart or reload CrowdSec: `sudo systemctl restart crowdsec` +1. Start the service and check CAPI connectivity: -1. Check engine status: - ```bash - sudo cscli console status - ``` +```bash +sudo systemctl start crowdsec +sudo cscli capi status +``` -2. Check CAPI connectivity: - ```bash - sudo cscli capi status - ``` +If it returns `You can successfully interact with Central API (CAPI)`, the ban is lifted. -If CAPI returns 200/204 and your console status is OK, the 403 is resolved. +If still blocked, contact [security@crowdsec.net](mailto:security@crowdsec.net) with your source IP and relevant logs. ## Known Issues diff --git a/crowdsec-docs/unversioned/troubleshooting/security_engine.mdx b/crowdsec-docs/unversioned/troubleshooting/security_engine.mdx index a41b26988..d44014b53 100644 --- a/crowdsec-docs/unversioned/troubleshooting/security_engine.mdx +++ b/crowdsec-docs/unversioned/troubleshooting/security_engine.mdx @@ -50,6 +50,10 @@ time="2024-01-08 14:08:22" level=info msg="You can successfully interact with Ce This command should **ONLY** be run on the parent node. ::: +:::warning +Do **not** use `cscli capi status` as a container health check — it authenticates against the Central API on every call and can trigger rate limiting. Use `cscli lapi status` instead. If this command returns `403 Forbidden`, see [Central API 403 / CAPI rate limiting](/u/troubleshooting/capi_403). +::: + ### How do I know if my setup is working correctly? Are some unparsed logs normal?