From 35e418a9c9097db833e5ceb1f7457ac95456daab Mon Sep 17 00:00:00 2001 From: sleepwithoutbz Date: Tue, 27 May 2025 22:07:38 +0800 Subject: [PATCH] =?UTF-8?q?analysis:=20=E8=BF=90=E8=A1=8C=E4=BA=86info=5Fr?= =?UTF-8?q?eveal=5Fai=5Fanalysis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- info_reveal_ai_analysis.py | 10 +- info_reveal_result/analysis_result.log | 7912 ++++++++++++++++++++++++ 2 files changed, 7917 insertions(+), 5 deletions(-) create mode 100644 info_reveal_result/analysis_result.log diff --git a/info_reveal_ai_analysis.py b/info_reveal_ai_analysis.py index 8672d6e..7986a62 100644 --- a/info_reveal_ai_analysis.py +++ b/info_reveal_ai_analysis.py @@ -28,10 +28,10 @@ def analyze_with_ai(data: str) -> str: prompt = """ 你是一个计算机安全领域的专家,能够更具CVE描述分析出CVE产生于哪个程序、有何影响后果等信息。 下面我会发给你一段文本,第一行是CVE的编号与文件位置信息,后续是这个CVE的详细描述。 - 请分析我发送给你的CVE详细描述内容,并进行分析,我有三个分析要求: + 请分析我发送给你的CVE详细描述内容,并进行分析,我有3个分析要求: 1. 分析这个CVE信息是否与namespace、cgroup、container或者容器、隔离相关; - 2. 分析这个CVE是否会导致容器,例如Docker等,在运行时会影响其他容器或宿主机; - 3. 如果条件1、2全部不满足,直接返回“N/A”,不需要附带任何信息。 + 2. 告诉我这是什么程序的漏洞,是内核Kernel,还是容器实现Docker,还是容器内部运行的应用,同时告诉我这个漏洞如何发生,它有何效果。 + 注意,如果这个CVE和namespace、cgroup、container或者容器、隔离无关,你只需返回"N/A"即可,无需任何多余信息。 """ client = OpenAI( @@ -61,7 +61,7 @@ if __name__ == "__main__": pattern = r"\./data/.*x/CVE.*?\.json" regex = re.compile(pattern) - with open("keyword_filter_11-25.log", "r", encoding="utf-8") as file: + with open("keyword_filter_21-25.log", "r", encoding="utf-8") as file: text = file.read() matches = list(regex.finditer(text)) @@ -79,4 +79,4 @@ if __name__ == "__main__": ans = analyze_with_ai(block) if ans == "N/A": continue - log.write(f"cve: {block}, analysis: {ans}\n\n") + log.write(f"cve: {block}\n\nanalysis: {ans}\n\n") diff --git a/info_reveal_result/analysis_result.log b/info_reveal_result/analysis_result.log new file mode 100644 index 0000000..7162e73 --- /dev/null +++ b/info_reveal_result/analysis_result.log @@ -0,0 +1,7912 @@ +cve: ./data/2021/20xxx/CVE-2021-20511.json +IBM Security Verify Access Docker 10.0.0 could allow a remote attacker to traverse directories on the system. An attacker could send a specially-crafted URL request containing "dot dot" sequences (/../) to view arbitrary files on the system. IBM X-Force ID: 198300. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,此 CVE 与容器相关,因为漏洞发生在 IBM Security Verify Access 的 Docker 环境中。 + +2. **程序漏洞分析**: + - **程序类型**:这是容器实现(Docker)中运行的应用程序(IBM Security Verify Access)的漏洞。 + - **漏洞发生原因**:应用程序未正确对用户输入进行验证和过滤,导致路径遍历漏洞。攻击者可以通过构造包含 `../` 的 URL 请求,绕过文件访问限制。 + - **效果**:攻击者可以查看系统上的任意文件,这可能泄露敏感信息或帮助攻击者进一步提升权限。虽然该漏洞本身不直接破坏容器隔离,但它可能间接影响容器的安全性,特别是如果泄露的信息暴露了容器主机的配置或凭据。 + +cve: ./data/2021/21xxx/CVE-2021-21284.json +In Docker before versions 9.03.15, 20.10.3 there is a vulnerability involving the --userns-remap option in which access to remapped root allows privilege escalation to real root. When using "--userns-remap", if the root user in the remapped namespace has access to the host filesystem they can modify files under "/var/lib/docker/" that cause writing files with extended privileges. Versions 20.10.3 and 19.03.15 contain patches that prevent privilege escalation from remapped user. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**:是的,该 CVE 信息与 namespace 和容器隔离机制密切相关,具体涉及用户命名空间(user namespace)的 remap 功能。 + +2. **漏洞所属程序及分析**: + - 这是 Docker 容器实现中的一个漏洞。 + - 漏洞发生的原因在于使用了 `--userns-remap` 选项时,Docker 未能正确限制 remapped root 用户在宿主机上的权限。如果容器内的 root 用户(实际上是被 remap 到宿主机上的非特权用户)能够访问宿主机的文件系统,他们可以修改 `/var/lib/docker/` 目录下的文件,从而以扩展的权限写入文件,最终导致从 remapped 用户到真实 root 用户的权限提升。 + - 效果:攻击者可以通过此漏洞在宿主机上获得真实的 root 权限,破坏容器的隔离性,对宿主机系统造成严重威胁。 + +总结:这是一个与用户命名空间(user namespace)相关的 Docker 漏洞,可能导致容器内的用户突破隔离并获取宿主机的 root 权限。 + +cve: ./data/2021/21xxx/CVE-2021-21285.json +In Docker before versions 9.03.15, 20.10.3 there is a vulnerability in which pulling an intentionally malformed Docker image manifest crashes the dockerd daemon. Versions 20.10.3 and 19.03.15 contain patches that prevent the daemon from crashing. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,这个CVE与容器相关,因为问题发生在Docker中,涉及恶意构建的Docker镜像manifest导致daemon崩溃。 + +2. **程序漏洞分析**: + - **程序**:这是Docker(容器实现)的漏洞。 + - **漏洞发生原因**:当使用Docker版本低于9.03.15或20.10.3时,拉取一个故意构造的恶意Docker镜像manifest会触发漏洞。 + - **效果**:该漏洞会导致Docker守护进程(dockerd daemon)崩溃,从而中断容器服务,可能造成拒绝服务(DoS)攻击。 + +cve: ./data/2021/21xxx/CVE-2021-21979.json +In Bitnami Containers, all Laravel container versions prior to: 6.20.0-debian-10-r107 for Laravel 6, 7.30.1-debian-10-r108 for Laravel 7 and 8.5.11-debian-10-r0 for Laravel 8, the file /tmp/app/.env is generated at the time that the docker image bitnami/laravel was built, and the value of APP_KEY is fixed under certain conditions. This value is crucial for the security of the application and must be randomly generated per Laravel installation. If your application's encryption key is in the hands of a malicious party, that party could craft cookie values using the encryption key and exploit vulnerabilities inherent to PHP object serialization / unserialization, such as calling arbitrary class methods within your application. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与容器相关。它涉及到 Bitnami 提供的 Laravel 容器镜像,其中 `/tmp/app/.env` 文件在构建 Docker 镜像时生成,并且 `APP_KEY` 的值在某些条件下是固定的。这表明问题发生在容器镜像构建阶段,可能影响容器化应用的安全性。 + +2. **漏洞所属程序及分析**: + - **程序**:这是 Laravel 应用程序在容器化环境中的漏洞,具体涉及 Bitnami 提供的 Laravel 容器镜像。 + - **漏洞发生原因**:在构建 Docker 镜像时,`/tmp/app/.env` 文件被生成,其中包含的 `APP_KEY` 值在某些条件下是固定的,而不是随机生成的。`APP_KEY` 是 Laravel 应用程序中用于加密和解密数据的关键值,其固定性可能导致安全风险。 + - **漏洞效果**:如果恶意攻击者获取了固定的 `APP_KEY`,他们可以利用该密钥伪造加密的 Cookie 值,并通过 PHP 对象序列化/反序列化的漏洞调用应用程序内的任意类方法,从而导致代码执行或其他安全问题。这种漏洞可能会影响容器内运行的应用程序的安全性,但不直接影响容器的隔离机制本身。 + +cve: ./data/2021/23xxx/CVE-2021-23133.json +A race condition in Linux kernel SCTP sockets (net/sctp/socket.c) before 5.12-rc8 can lead to kernel privilege escalation from the context of a network service or an unprivileged process. If sctp_destroy_sock is called without sock_net(sk)->sctp.addr_wq_lock then an element is removed from the auto_asconf_splist list without any proper locking. This can be exploited by an attacker with network service privileges to escalate to root or from the context of an unprivileged user directly if a BPF_CGROUP_INET_SOCK_CREATE is attached which denies creation of some SCTP socket. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该 CVE 与 cgroup 和容器隔离相关。具体来说,问题涉及 BPF_CGROUP_INET_SOCK_CREATE,这是一个 cgroup v2 的功能,用于控制网络套接字的创建。如果一个 cgroup 策略禁止某些 SCTP 套接字的创建,未正确处理的竞态条件可能导致内核崩溃或权限提升。 + +2. **漏洞详情分析:** + - **程序类型:** 这是 Linux 内核 (Kernel) 的漏洞。 + - **漏洞发生位置:** 漏洞存在于 SCTP 套接字的实现中,具体文件为 `net/sctp/socket.c`。 + - **漏洞原因:** 在调用 `sctp_destroy_sock` 函数时,如果没有正确持有 `sock_net(sk)->sctp.addr_wq_lock` 锁,则会从 `auto_asconf_splist` 列表中移除元素,但此时没有适当的锁保护,导致竞态条件。 + - **影响后果:** 攻击者可以利用此竞态条件,通过以下两种方式提升权限: + 1. 如果攻击者具有网络服务权限,可以利用此漏洞直接进行内核权限提升至 root。 + 2. 如果一个 BPF_CGROUP_INET_SOCK_CREATE 策略被附加到 cgroup,并且该策略阻止某些 SCTP 套接字的创建,则非特权用户可以直接利用此漏洞进行权限提升。 + - **潜在风险:** 该漏洞可能破坏容器隔离,使得攻击者能够从受限的容器环境中逃脱并获得主机系统的更高权限。 + +cve: ./data/2021/23xxx/CVE-2021-23732.json +This affects all versions of package docker-cli-js. If the command parameter of the Docker.command method can at least be partially controlled by a user, they will be in a position to execute any arbitrary OS commands on the host system. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是容器实现Docker的漏洞,具体为`docker-cli-js`包中的漏洞。该漏洞发生在当用户能够部分控制`Docker.command`方法的命令参数时,攻击者可以利用此漏洞在主机系统上执行任意OS命令。其效果是破坏了容器的隔离性,使攻击者能够在宿主系统上执行恶意操作。 + +cve: ./data/2021/33xxx/CVE-2021-33183.json +Improper limitation of a pathname to a restricted directory ('Path Traversal') vulnerability container volume management component in Synology Docker before 18.09.0-0515 allows local users to read or write arbitrary files via unspecified vectors. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 该CVE描述中提到“container volume management component”,表明它与容器的卷管理组件相关,因此与容器技术直接相关。 + +2. **程序漏洞分析**: + - **程序**:这是 Synology Docker 的漏洞,而不是 Linux 内核或容器内部运行的应用。 + - **漏洞发生原因**:Synology Docker 的容器卷管理组件未能正确限制路径名,导致路径遍历(Path Traversal)漏洞。 + - **效果**:本地用户可以通过未指定的向量读取或写入任意文件。这种漏洞可能破坏容器的隔离性,允许攻击者访问宿主机上的敏感文件或修改关键配置,从而对系统安全造成严重影响。 + +cve: ./data/2021/37xxx/CVE-2021-37841.json +Docker Desktop before 3.6.0 suffers from incorrect access control. If a low-privileged account is able to access the server running the Windows containers, it can lead to a full container compromise in both process isolation and Hyper-V isolation modes. This security issue leads an attacker with low privilege to read, write and possibly even execute code inside the containers. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 该CVE与容器和隔离相关。 + +2. **程序漏洞分析**: + - **程序**:这是 Docker Desktop 的漏洞。 + - **漏洞发生原因**:Docker Desktop 在版本 3.6.0 之前存在访问控制不当的问题。低权限用户如果能够访问运行 Windows 容器的服务器,就可以利用该漏洞。 + - **效果**:攻击者可以完全控制容器(无论容器使用的是进程隔离还是 Hyper-V 隔离模式),从而实现对容器内文件的读取、写入,甚至可能执行代码。这表明容器的隔离机制被破坏,低权限用户可以突破隔离边界,影响容器的安全性。 + +cve: ./data/2021/38xxx/CVE-2021-38209.json +net/netfilter/nf_conntrack_standalone.c in the Linux kernel before 5.12.2 allows observation of changes in any net namespace because these changes are leaked into all other net namespaces. This is related to the NF_SYSCTL_CT_MAX, NF_SYSCTL_CT_EXPECT_MAX, and NF_SYSCTL_CT_BUCKETS sysctls. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace 相关。具体来说,它涉及网络命名空间(net namespace)中的信息泄露问题。 + +2. **漏洞所属程序及影响分析**: + - 这是 Linux 内核(Kernel)的漏洞。 + - 漏洞发生的原因是 `nf_conntrack_standalone.c` 文件中对某些 sysctl 参数(如 `NF_SYSCTL_CT_MAX`、`NF_SYSCTL_CT_EXPECT_MAX` 和 `NF_SYSCTL_CT_BUCKETS`)的处理不当,导致在一个网络命名空间中的更改被泄露到其他网络命名空间。 + - 效果:攻击者可以利用此漏洞观察和获取其他网络命名空间中的连接跟踪(conntrack)信息,从而破坏命名空间之间的隔离性。这种信息泄露可能被进一步利用来实施更复杂的攻击,例如跨命名空间的流量分析或攻击规划。 + +cve: ./data/2021/3xxx/CVE-2021-3162.json +Docker Desktop Community before 2.5.0.0 on macOS mishandles certificate checking, leading to local privilege escalation. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop Community程序的漏洞。该漏洞发生在macOS平台上的Docker Desktop Community版本低于2.5.0.0时,由于证书检查处理不当,可能导致本地提权(local privilege escalation)。攻击者可以利用此漏洞在容器环境中提升权限,从而突破容器的隔离机制,对宿主机或其他容器造成潜在威胁。 + +cve: ./data/2021/3xxx/CVE-2021-3493.json +The overlayfs implementation in the linux kernel did not properly validate with respect to user namespaces the setting of file capabilities on files in an underlying file system. Due to the combination of unprivileged user namespaces along with a patch carried in the Ubuntu kernel to allow unprivileged overlay mounts, an attacker could use this to gain elevated privileges. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Linux内核(Kernel)的漏洞。漏洞发生在overlayfs的实现中,由于没有正确针对用户命名空间验证底层文件系统中文件功能的设置,导致问题。具体来说,当结合非特权用户命名空间以及Ubuntu内核中允许非特权overlay挂载的补丁时,攻击者可以利用此漏洞获得提升的权限。 + +效果:攻击者可以通过此漏洞获得更高的权限,可能突破容器隔离,影响宿主系统安全。 + +cve: ./data/2021/3xxx/CVE-2021-3602.json +An information disclosure flaw was found in Buildah, when building containers using chroot isolation. Running processes in container builds (e.g. Dockerfile RUN commands) can access environment variables from parent and grandparent processes. When run in a container in a CI/CD environment, environment variables may include sensitive information that was shared with the container in order to be used only by Buildah itself (e.g. container registry credentials). + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Buildah程序的漏洞。 + - 漏洞发生的原因:在使用chroot隔离方式构建容器时,Buildah未能正确隔离环境变量,导致容器中的运行进程(例如Dockerfile中的RUN命令)可以访问到父进程或祖父进程的环境变量。 + - 效果:如果Buildah在CI/CD环境中运行,容器可能接收到一些敏感信息(如容器镜像仓库凭据),这些信息原本只应由Buildah自身使用。由于此漏洞,容器内的运行进程能够访问这些敏感环境变量,从而可能导致敏感信息泄露。 + +cve: ./data/2021/41xxx/CVE-2021-41089.json +Moby is an open-source project created by Docker to enable software containerization. A bug was found in Moby (Docker Engine) where attempting to copy files using `docker cp` into a specially-crafted container can result in Unix file permission changes for existing files in the host’s filesystem, widening access to others. This bug does not directly allow files to be read, modified, or executed without an additional cooperating process. This bug has been fixed in Moby (Docker Engine) 20.10.9. Users should update to this version as soon as possible. Running containers do not need to be restarted. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Docker Engine(Moby项目)的漏洞。该漏洞发生在使用`docker cp`命令将文件复制到经过特殊构造的容器时,可能导致主机文件系统中现有文件的Unix文件权限发生更改,从而使这些文件对其他用户开放访问权限。虽然此漏洞本身不会直接导致文件被读取、修改或执行,但它可能为攻击者提供更广泛的访问权限,结合其他漏洞或配合进程可能导致进一步的安全问题。 + +cve: ./data/2021/41xxx/CVE-2021-41091.json +Moby is an open-source project created by Docker to enable software containerization. A bug was found in Moby (Docker Engine) where the data directory (typically `/var/lib/docker`) contained subdirectories with insufficiently restricted permissions, allowing otherwise unprivileged Linux users to traverse directory contents and execute programs. When containers included executable programs with extended permission bits (such as `setuid`), unprivileged Linux users could discover and execute those programs. When the UID of an unprivileged Linux user on the host collided with the file owner or group inside a container, the unprivileged Linux user on the host could discover, read, and modify those files. This bug has been fixed in Moby (Docker Engine) 20.10.9. Users should update to this version as soon as possible. Running containers should be stopped and restarted for the permissions to be fixed. For users unable to upgrade limit access to the host to trusted users. Limit access to host volumes to trusted containers. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,这个 CVE 与容器和隔离相关。 + +2. **漏洞所属程序及影响:** + - 这是 **Moby(Docker Engine)** 的漏洞。 + - 漏洞发生的原因是 `/var/lib/docker` 数据目录中的某些子目录权限设置不当,导致未授权的 Linux 用户可以遍历目录内容并执行其中的程序。 + - 如果容器中包含带有扩展权限位(如 `setuid`)的可执行程序,未授权用户可以发现并执行这些程序。 + - 当主机上的未授权用户 UID 与容器内文件的所有者或组 ID 冲突时,该用户还可以读取和修改容器内的文件。 + - 影响后果包括: + - 破坏容器隔离性。 + - 允许未授权用户访问和操作容器内的敏感文件或程序。 + - 可能导致进一步的权限提升或系统攻击。 + +3. **修复建议:** + - 升级到 Moby(Docker Engine)20.10.9 或更高版本。 + - 停止并重新启动所有运行中的容器以应用修复后的权限设置。 + - 如果无法升级,限制主机访问仅限可信用户,并确保主机卷仅挂载给可信容器。 + +cve: ./data/2021/41xxx/CVE-2021-41092.json +Docker CLI is the command line interface for the docker container runtime. A bug was found in the Docker CLI where running `docker login my-private-registry.example.com` with a misconfigured configuration file (typically `~/.docker/config.json`) listing a `credsStore` or `credHelpers` that could not be executed would result in any provided credentials being sent to `registry-1.docker.io` rather than the intended private registry. This bug has been fixed in Docker CLI 20.10.9. Users should update to this version as soon as possible. For users unable to update ensure that any configured credsStore or credHelpers entries in the configuration file reference an installed credential helper that is executable and on the PATH. + +analysis: 1. **是否与 namespace、cgroup、container 或容器隔离相关**: + 是的,这个 CVE 与容器相关,因为它涉及 Docker CLI,这是 Docker 容器运行时的一部分。 + +2. **漏洞分析**: + - **程序**:这是 Docker CLI 的漏洞,而不是内核 Kernel 或容器内部运行的应用。 + - **漏洞发生原因**:当用户尝试通过 `docker login` 登录到一个私有镜像仓库时,如果配置文件(通常是 `~/.docker/config.json`)中指定了无法执行的 `credsStore` 或 `credHelpers`,Docker CLI 会错误地将提供的凭据发送到默认的公共镜像仓库 `registry-1.docker.io`,而不是用户指定的私有镜像仓库。 + - **效果**:此漏洞可能导致用户的敏感凭据被意外发送到公共镜像仓库,从而引发潜在的安全风险,例如凭据泄露。这会影响容器镜像的拉取和推送操作的安全性。 + +cve: ./data/2021/41xxx/CVE-2021-41805.json +HashiCorp Consul Enterprise before 1.8.17, 1.9.x before 1.9.11, and 1.10.x before 1.10.4 has Incorrect Access Control. An ACL token (with the default operator:write permissions) in one namespace can be used for unintended privilege escalation in a different namespace. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 该CVE描述中提到“namespace”一词,且涉及跨命名空间的权限提升问题。因此,它与namespace相关,但未提及cgroup、container或容器隔离相关内容。 + +2. **程序漏洞分析**: + - **程序**:这是HashiCorp Consul Enterprise的漏洞,而不是内核Kernel、Docker或其他容器实现的漏洞。 + - **漏洞发生原因**:ACL token在某个命名空间中具有`operator:write`权限,但由于访问控制实现不正确,此token可以被用于其他命名空间,导致未预期的权限提升。 + - **效果**:攻击者可以通过滥用ACL token,在未经授权的情况下访问或操作其他命名空间中的资源,从而破坏命名空间之间的隔离性,并可能进一步影响服务的安全性和完整性。 + +cve: ./data/2021/42xxx/CVE-2021-42762.json +BubblewrapLauncher.cpp in WebKitGTK and WPE WebKit before 2.34.1 allows a limited sandbox bypass that allows a sandboxed process to trick host processes into thinking the sandboxed process is not confined by the sandbox, by abusing VFS syscalls that manipulate its filesystem namespace. The impact is limited to host services that create UNIX sockets that WebKit mounts inside its sandbox, and the sandboxed process remains otherwise confined. NOTE: this is similar to CVE-2021-41133. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,此 CVE 与 namespace 和隔离机制相关。它涉及通过滥用 VFS 系统调用操纵文件系统命名空间,从而绕过沙箱限制。 + +2. **程序漏洞分析**: + - **程序**:这是 WebKitGTK 和 WPE WebKit 的漏洞。 + - **漏洞发生原因**:漏洞存在于 BubblewrapLauncher.cpp 文件中,沙箱内的进程可以利用 VFS 系统调用(如 `mount` 或 `unshare`)来操作其文件系统命名空间,导致主机进程误认为该沙箱内进程未被限制。 + - **效果**:虽然沙箱内的进程仍然受到其他限制,但它可以欺骗主机服务,使这些服务相信它没有被沙箱限制。这种行为可能导致主机上创建的 UNIX 套接字被挂载到沙箱内,从而允许沙箱内进程访问这些套接字,进一步可能引发信息泄露或其他权限提升问题。 + +总结:此 CVE 与文件系统命名空间和沙箱隔离机制直接相关,影响 WebKitGTK 和 WPE WebKit 的沙箱实现。 + +cve: ./data/2021/43xxx/CVE-2021-43784.json +runc is a CLI tool for spawning and running containers on Linux according to the OCI specification. In runc, netlink is used internally as a serialization system for specifying the relevant container configuration to the `C` portion of the code (responsible for the based namespace setup of containers). In all versions of runc prior to 1.0.3, the encoder did not handle the possibility of an integer overflow in the 16-bit length field for the byte array attribute type, meaning that a large enough malicious byte array attribute could result in the length overflowing and the attribute contents being parsed as netlink messages for container configuration. This vulnerability requires the attacker to have some control over the configuration of the container and would allow the attacker to bypass the namespace restrictions of the container by simply adding their own netlink payload which disables all namespaces. The main users impacted are those who allow untrusted images with untrusted configurations to run on their machines (such as with shared cloud infrastructure). runc version 1.0.3 contains a fix for this bug. As a workaround, one may try disallowing untrusted namespace paths from your container. It should be noted that untrusted namespace paths would allow the attacker to disable namespace protections entirely even in the absence of this bug. + +analysis: ### 分析结果: + +1. **是否与namespace、cgroup、container或者容器、隔离相关:** + - 是的,该CVE与namespace、container和隔离机制密切相关。漏洞允许攻击者通过恶意配置绕过namespace限制,从而破坏容器的隔离性。 + +2. **程序的漏洞信息:** + - **程序名称:** runc(一个用于根据OCI规范在Linux上启动和运行容器的CLI工具)。 + - **漏洞类型:** 整数溢出漏洞。 + - **漏洞发生原因:** 在runc中,netlink被用作序列化系统来指定容器配置。然而,在处理字节数组属性时,编码器未能正确处理16位长度字段中的整数溢出问题。如果攻击者提供足够大的恶意字节数组属性,可能导致长度字段溢出,从而使属性内容被错误解析为netlink消息。 + - **漏洞效果:** 攻击者可以通过添加自定义的netlink负载来禁用所有namespace,从而完全绕过容器的隔离机制。这使得攻击者能够访问宿主机的资源或执行其他恶意操作。 + - **受影响用户:** 允许运行不可信镜像或不可信配置的用户(例如共享云基础设施的用户)。 + +3. **修复版本:** runc 1.0.3 已修复此漏洞。 + +### 总结: +该漏洞直接与容器的namespace隔离机制相关,攻击者可以通过恶意配置绕过namespace限制,破坏容器隔离性。 + +cve: ./data/2021/44xxx/CVE-2021-44719.json +Docker Desktop 4.3.0 has Incorrect Access Control. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop程序的漏洞。 + 漏洞发生的原因是Docker Desktop 4.3.0版本中存在错误的访问控制(Incorrect Access Control),这可能导致未经授权的用户或进程获得对容器或主机资源的不当访问权限。其效果可能包括突破容器隔离,访问或修改宿主机上的敏感数据,甚至执行任意代码。 + +cve: ./data/2021/44xxx/CVE-2021-44731.json +A race condition existed in the snapd 2.54.2 snap-confine binary when preparing a private mount namespace for a snap. This could allow a local attacker to gain root privileges by bind-mounting their own contents inside the snap's private mount namespace and causing snap-confine to execute arbitrary code and hence gain privilege escalation. Fixed in snapd versions 2.54.3+18.04, 2.54.3+20.04 and 2.54.3+21.10.1 + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与 namespace 相关。描述中明确提到 "private mount namespace",这是 Linux namespace 的一种,用于实现文件系统级别的隔离。 + +2. **漏洞分析:** + - **程序名称:** 这是 snapd 程序的漏洞,具体涉及其子组件 snap-confine。 + - **漏洞发生原因:** 在准备 snap 的私有挂载命名空间时,存在竞争条件(race condition)。攻击者可以利用这一时间窗口,通过绑定挂载(bind-mount)将自定义内容注入到 snap 的私有挂载命名空间中。 + - **漏洞效果:** 攻击者可以通过这种方式让 snap-confine 执行任意代码,从而获得 root 权限,导致本地提权(privilege escalation)。 + - **漏洞修复:** 已在 snapd 版本 2.54.3+18.04、2.54.3+20.04 和 2.54.3+21.10.1 中修复。 + +总结:这是一个与 Linux namespace(挂载命名空间)相关的漏洞,影响 snapd 程序,可能导致本地攻击者获得 root 权限。 + +cve: ./data/2021/46xxx/CVE-2021-46283.json +nf_tables_newset in net/netfilter/nf_tables_api.c in the Linux kernel before 5.12.13 allows local users to cause a denial of service (NULL pointer dereference and general protection fault) because of the missing initialization for nft_set_elem_expr_alloc. A local user can set a netfilter table expression in their own namespace. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,这个 CVE 与 namespace 相关。描述中提到 "A local user can set a netfilter table expression in their own namespace",表明漏洞涉及网络命名空间(network namespace),这是 Linux 容器隔离机制的一部分。 + +2. **程序及漏洞分析:** + - **程序:** 这是 Linux 内核(Kernel)中的漏洞。 + - **漏洞发生原因:** 在 `nf_tables_newset` 函数中,由于 `nft_set_elem_expr_alloc` 缺少初始化,导致在处理 netfilter 表达式时可能出现空指针解引用的问题。 + - **效果:** 攻击者可以通过在自己的网络命名空间中设置特定的 netfilter 表达式,触发空指针解引用,从而导致内核崩溃(general protection fault),造成拒绝服务(DoS)。 + +cve: ./data/2021/46xxx/CVE-2021-46912.json +In the Linux kernel, the following vulnerability has been resolved: + +net: Make tcp_allowed_congestion_control readonly in non-init netns + +Currently, tcp_allowed_congestion_control is global and writable; +writing to it in any net namespace will leak into all other net +namespaces. + +tcp_available_congestion_control and tcp_allowed_congestion_control are +the only sysctls in ipv4_net_table (the per-netns sysctl table) with a +NULL data pointer; their handlers (proc_tcp_available_congestion_control +and proc_allowed_congestion_control) have no other way of referencing a +struct net. Thus, they operate globally. + +Because ipv4_net_table does not use designated initializers, there is no +easy way to fix up this one "bad" table entry. However, the data pointer +updating logic shouldn't be applied to NULL pointers anyway, so we +instead force these entries to be read-only. + +These sysctls used to exist in ipv4_table (init-net only), but they were +moved to the per-net ipv4_net_table, presumably without realizing that +tcp_allowed_congestion_control was writable and thus introduced a leak. + +Because the intent of that commit was only to know (i.e. read) "which +congestion algorithms are available or allowed", this read-only solution +should be sufficient. + +The logic added in recent commit +31c4d2f160eb: ("net: Ensure net namespace isolation of sysctls") +does not and cannot check for NULL data pointers, because +other table entries (e.g. /proc/sys/net/netfilter/nf_log/) have +.data=NULL but use other methods (.extra2) to access the struct net. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与namespace相关,具体涉及网络命名空间(net namespace)的隔离问题。 + +2. **程序漏洞分析**: + - **程序**:Linux内核(Kernel)。 + - **漏洞发生原因**:`tcp_allowed_congestion_control` 系统控制变量在所有网络命名空间中是全局可写的。这意味着,在任何网络命名空间中修改该变量的值,都会影响到其他网络命名空间中的值,从而破坏了网络命名空间之间的隔离性。 + - **效果**:攻击者可以通过在某个网络命名空间中写入 `tcp_allowed_congestion_control`,导致其他网络命名空间的拥塞控制算法配置被篡改。这可能会引发未预期的网络行为,例如性能下降或拒绝服务(DoS)。 + +总结:这是一个与网络命名空间隔离相关的 Linux 内核漏洞,可能导致跨命名空间的数据泄漏和配置篡改。 + +cve: ./data/2021/46xxx/CVE-2021-46936.json +In the Linux kernel, the following vulnerability has been resolved: + +net: fix use-after-free in tw_timer_handler + +A real world panic issue was found as follow in Linux 5.4. + + BUG: unable to handle page fault for address: ffffde49a863de28 + PGD 7e6fe62067 P4D 7e6fe62067 PUD 7e6fe63067 PMD f51e064067 PTE 0 + RIP: 0010:tw_timer_handler+0x20/0x40 + Call Trace: + + call_timer_fn+0x2b/0x120 + run_timer_softirq+0x1ef/0x450 + __do_softirq+0x10d/0x2b8 + irq_exit+0xc7/0xd0 + smp_apic_timer_interrupt+0x68/0x120 + apic_timer_interrupt+0xf/0x20 + +This issue was also reported since 2017 in the thread [1], +unfortunately, the issue was still can be reproduced after fixing +DCCP. + +The ipv4_mib_exit_net is called before tcp_sk_exit_batch when a net +namespace is destroyed since tcp_sk_ops is registered befrore +ipv4_mib_ops, which means tcp_sk_ops is in the front of ipv4_mib_ops +in the list of pernet_list. There will be a use-after-free on +net->mib.net_statistics in tw_timer_handler after ipv4_mib_exit_net +if there are some inflight time-wait timers. + +This bug is not introduced by commit f2bf415cfed7 ("mib: add net to +NET_ADD_STATS_BH") since the net_statistics is a global variable +instead of dynamic allocation and freeing. Actually, commit +61a7e26028b9 ("mib: put net statistics on struct net") introduces +the bug since it put net statistics on struct net and free it when +net namespace is destroyed. + +Moving init_ipv4_mibs() to the front of tcp_init() to fix this bug +and replace pr_crit() with panic() since continuing is meaningless +when init_ipv4_mibs() fails. + +[1] https://groups.google.com/g/syzkaller/c/p1tn-_Kc6l4/m/smuL_FMAAgAJ?pli=1 + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该漏洞与namespace相关。具体来说,问题发生在销毁网络命名空间(net namespace)时,由于`tcp_sk_ops`和`ipv4_mib_ops`的注册顺序不同,导致在处理时间等待(time-wait)计时器时可能出现使用已释放内存的情况。 + +2. **这是什么程序的漏洞**: + 这是Linux内核(Kernel)的漏洞。漏洞发生在网络子系统中,特别是在处理网络命名空间销毁时的时间等待计时器逻辑。 + +3. **漏洞如何发生及其效果**: + - **漏洞发生原因**: + 在销毁网络命名空间时,`ipv4_mib_exit_net`会在`tcp_sk_exit_batch`之前被调用,这是因为`tcp_sk_ops`在`ipv4_mib_ops`之前注册,因此在`pernet_list`列表中排在前面。这会导致在网络命名空间销毁后,如果存在未完成的时间等待计时器,`tw_timer_handler`可能会访问已被释放的`net->mib.net_statistics`,从而引发使用已释放内存(use-after-free)的问题。 + + - **漏洞效果**: + 该漏洞可能导致内核崩溃(kernel panic),表现为无法处理页面错误(page fault)。这种崩溃可能会影响整个系统的稳定性,尤其是在涉及网络命名空间的操作中,例如在容器环境中创建或销毁网络命名空间时。 + +cve: ./data/2021/47xxx/CVE-2021-47010.json +In the Linux kernel, the following vulnerability has been resolved: + +net: Only allow init netns to set default tcp cong to a restricted algo + +tcp_set_default_congestion_control() is netns-safe in that it writes +to &net->ipv4.tcp_congestion_control, but it also sets +ca->flags |= TCP_CONG_NON_RESTRICTED which is not namespaced. +This has the unintended side-effect of changing the global +net.ipv4.tcp_allowed_congestion_control sysctl, despite the fact that it +is read-only: 97684f0970f6 ("net: Make tcp_allowed_congestion_control +readonly in non-init netns") + +Resolve this netns "leak" by only allowing the init netns to set the +default algorithm to one that is restricted. This restriction could be +removed if tcp_allowed_congestion_control were namespace-ified in the +future. + +This bug was uncovered with +https://github.com/JonathonReinhart/linux-netns-sysctl-verify + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 相关。具体来说,它涉及网络命名空间(netns)的安全性问题。 + +2. **漏洞所属程序及分析**: + - 这是 Linux 内核(Kernel)中的一个漏洞。 + - 漏洞发生的原因在于 `tcp_set_default_congestion_control()` 函数中,虽然该函数写入了命名空间特定的变量 `&net->ipv4.tcp_congestion_control`,但它还设置了全局标志位 `ca->flags |= TCP_CONG_NON_RESTRICTED`,而这个标志位并未被正确地限制在命名空间内。 + - 结果导致在非初始命名空间中修改默认的 TCP 拥塞控制算法时,会意外地改变全局的 `net.ipv4.tcp_allowed_congestion_control` 系统参数,尽管该参数在非初始命名空间中应该是只读的。 + - 这种行为会导致跨命名空间的泄漏(leak),使得非初始命名空间可以影响全局的 TCP 拥塞控制配置,从而破坏隔离性。 + +3. **漏洞效果**: + - 攻击者可能利用此漏洞,在非初始网络命名空间中通过设置特定的 TCP 拥塞控制算法,间接影响全局的 TCP 拥塞控制策略。 + - 这种影响可能会波及其他使用相同内核的容器或进程,破坏容器之间的隔离性,甚至可能导致性能下降或其他未定义行为。 + +cve: ./data/2021/47xxx/CVE-2021-47011.json +In the Linux kernel, the following vulnerability has been resolved: + +mm: memcontrol: slab: fix obtain a reference to a freeing memcg + +Patch series "Use obj_cgroup APIs to charge kmem pages", v5. + +Since Roman's series "The new cgroup slab memory controller" applied. +All slab objects are charged with the new APIs of obj_cgroup. The new +APIs introduce a struct obj_cgroup to charge slab objects. It prevents +long-living objects from pinning the original memory cgroup in the +memory. But there are still some corner objects (e.g. allocations +larger than order-1 page on SLUB) which are not charged with the new +APIs. Those objects (include the pages which are allocated from buddy +allocator directly) are charged as kmem pages which still hold a +reference to the memory cgroup. + +E.g. We know that the kernel stack is charged as kmem pages because the +size of the kernel stack can be greater than 2 pages (e.g. 16KB on +x86_64 or arm64). If we create a thread (suppose the thread stack is +charged to memory cgroup A) and then move it from memory cgroup A to +memory cgroup B. Because the kernel stack of the thread hold a +reference to the memory cgroup A. The thread can pin the memory cgroup +A in the memory even if we remove the cgroup A. If we want to see this +scenario by using the following script. We can see that the system has +added 500 dying cgroups (This is not a real world issue, just a script +to show that the large kmallocs are charged as kmem pages which can pin +the memory cgroup in the memory). + + #!/bin/bash + + cat /proc/cgroups | grep memory + + cd /sys/fs/cgroup/memory + echo 1 > memory.move_charge_at_immigrate + + for i in range{1..500} + do + mkdir kmem_test + echo $$ > kmem_test/cgroup.procs + sleep 3600 & + echo $$ > cgroup.procs + echo `cat kmem_test/cgroup.procs` > cgroup.procs + rmdir kmem_test + done + + cat /proc/cgroups | grep memory + +This patchset aims to make those kmem pages to drop the reference to +memory cgroup by using the APIs of obj_cgroup. Finally, we can see that +the number of the dying cgroups will not increase if we run the above test +script. + +This patch (of 7): + +The rcu_read_lock/unlock only can guarantee that the memcg will not be +freed, but it cannot guarantee the success of css_get (which is in the +refill_stock when cached memcg changed) to memcg. + + rcu_read_lock() + memcg = obj_cgroup_memcg(old) + __memcg_kmem_uncharge(memcg) + refill_stock(memcg) + if (stock->cached != memcg) + // css_get can change the ref counter from 0 back to 1. + css_get(&memcg->css) + rcu_read_unlock() + +This fix is very like the commit: + + eefbfa7fd678 ("mm: memcg/slab: fix use after free in obj_cgroup_charge") + +Fix this by holding a reference to the memcg which is passed to the +__memcg_kmem_uncharge() before calling __memcg_kmem_uncharge(). + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与cgroup(控制组)直接相关。它涉及Linux内核中内存控制组(memory cgroup)的实现,特别是与slab分配器和对象cgroup(obj_cgroup)相关的内存管理问题。 + +2. **这是什么程序的漏洞**: + - **程序**:Linux内核(Kernel)。 + - **漏洞发生原因**:在处理slab分配器的对象时,某些内存分配(如大于order-1页面的SLUB分配或直接从伙伴分配器分配的页面)未正确使用新的obj_cgroup API进行计费。这导致这些分配仍然持有对旧内存控制组的引用,即使该控制组已被删除。这种引用可能导致被移除的内存控制组无法释放,从而引发资源泄漏或影响cgroup的正常生命周期管理。 + - **效果**:此漏洞可能导致内存控制组在被逻辑上移除后仍然被长期引用,进而阻止其资源的回收。这种情况可能在大量创建和销毁线程或cgroup的场景下显现,例如通过脚本测试时观察到有500个“dying cgroups”未能正确释放。这会间接影响基于cgroup实现的容器隔离机制,例如Docker或Kubernetes中的资源限制功能。 + +总结:该漏洞与cgroup密切相关,并且可能间接影响依赖cgroup实现资源隔离的容器技术。 + +cve: ./data/2021/47xxx/CVE-2021-47119.json +In the Linux kernel, the following vulnerability has been resolved: + +ext4: fix memory leak in ext4_fill_super + +Buffer head references must be released before calling kill_bdev(); +otherwise the buffer head (and its page referenced by b_data) will not +be freed by kill_bdev, and subsequently that bh will be leaked. + +If blocksizes differ, sb_set_blocksize() will kill current buffers and +page cache by using kill_bdev(). And then super block will be reread +again but using correct blocksize this time. sb_set_blocksize() didn't +fully free superblock page and buffer head, and being busy, they were +not freed and instead leaked. + +This can easily be reproduced by calling an infinite loop of: + + systemctl start .mount, and + systemctl stop .mount + +... since systemd creates a cgroup for each slice which it mounts, and +the bh leak get amplified by a dying memory cgroup that also never +gets freed, and memory consumption is much more easily noticed. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,此漏洞与cgroup相关。描述中提到系统通过`systemd`创建了一个cgroup来挂载文件系统,并且由于内存泄漏问题,当cgroup销毁时,相关的缓冲区头部(buffer head)和内存页无法被释放,导致内存消耗显著增加。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核(Kernel)中的漏洞,具体涉及`ext4`文件系统模块。 + - **漏洞发生原因**:在`sb_set_blocksize()`函数中,当块大小发生变化时,会调用`kill_bdev()`清除当前的缓冲区和页面缓存。然而,代码没有正确释放超级块(superblock)关联的缓冲区头部(buffer head)和内存页,导致内存泄漏。 + - **效果**:通过反复挂载和卸载`ext4`文件系统(例如通过`systemctl start`和`systemctl stop`命令),可以触发内存泄漏问题。由于`systemd`为每个挂载操作创建一个cgroup,当cgroup销毁时,未释放的内存会被进一步放大,最终可能导致系统内存耗尽。 + +cve: ./data/2021/47xxx/CVE-2021-47126.json +In the Linux kernel, the following vulnerability has been resolved: + +ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions + +Reported by syzbot: +HEAD commit: 90c911ad Merge tag 'fixes' of git://git.kernel.org/pub/scm.. +git tree: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master +dashboard link: https://syzkaller.appspot.com/bug?extid=123aa35098fd3c000eb7 +compiler: Debian clang version 11.0.1-2 + +================================================================== +BUG: KASAN: slab-out-of-bounds in fib6_nh_get_excptn_bucket net/ipv6/route.c:1604 [inline] +BUG: KASAN: slab-out-of-bounds in fib6_nh_flush_exceptions+0xbd/0x360 net/ipv6/route.c:1732 +Read of size 8 at addr ffff8880145c78f8 by task syz-executor.4/17760 + +CPU: 0 PID: 17760 Comm: syz-executor.4 Not tainted 5.12.0-rc8-syzkaller #0 +Call Trace: + + __dump_stack lib/dump_stack.c:79 [inline] + dump_stack+0x202/0x31e lib/dump_stack.c:120 + print_address_description+0x5f/0x3b0 mm/kasan/report.c:232 + __kasan_report mm/kasan/report.c:399 [inline] + kasan_report+0x15c/0x200 mm/kasan/report.c:416 + fib6_nh_get_excptn_bucket net/ipv6/route.c:1604 [inline] + fib6_nh_flush_exceptions+0xbd/0x360 net/ipv6/route.c:1732 + fib6_nh_release+0x9a/0x430 net/ipv6/route.c:3536 + fib6_info_destroy_rcu+0xcb/0x1c0 net/ipv6/ip6_fib.c:174 + rcu_do_batch kernel/rcu/tree.c:2559 [inline] + rcu_core+0x8f6/0x1450 kernel/rcu/tree.c:2794 + __do_softirq+0x372/0x7a6 kernel/softirq.c:345 + invoke_softirq kernel/softirq.c:221 [inline] + __irq_exit_rcu+0x22c/0x260 kernel/softirq.c:422 + irq_exit_rcu+0x5/0x20 kernel/softirq.c:434 + sysvec_apic_timer_interrupt+0x91/0xb0 arch/x86/kernel/apic/apic.c:1100 + + asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632 +RIP: 0010:lock_acquire+0x1f6/0x720 kernel/locking/lockdep.c:5515 +Code: f6 84 24 a1 00 00 00 02 0f 85 8d 02 00 00 f7 c3 00 02 00 00 49 bd 00 00 00 00 00 fc ff df 74 01 fb 48 c7 44 24 40 0e 36 e0 45 <4b> c7 44 3d 00 00 00 00 00 4b c7 44 3d 09 00 00 00 00 43 c7 44 3d +RSP: 0018:ffffc90009e06560 EFLAGS: 00000206 +RAX: 1ffff920013c0cc0 RBX: 0000000000000246 RCX: dffffc0000000000 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 +RBP: ffffc90009e066e0 R08: dffffc0000000000 R09: fffffbfff1f992b1 +R10: fffffbfff1f992b1 R11: 0000000000000000 R12: 0000000000000000 +R13: dffffc0000000000 R14: 0000000000000000 R15: 1ffff920013c0cb4 + rcu_lock_acquire+0x2a/0x30 include/linux/rcupdate.h:267 + rcu_read_lock include/linux/rcupdate.h:656 [inline] + ext4_get_group_info+0xea/0x340 fs/ext4/ext4.h:3231 + ext4_mb_prefetch+0x123/0x5d0 fs/ext4/mballoc.c:2212 + ext4_mb_regular_allocator+0x8a5/0x28f0 fs/ext4/mballoc.c:2379 + ext4_mb_new_blocks+0xc6e/0x24f0 fs/ext4/mballoc.c:4982 + ext4_ext_map_blocks+0x2be3/0x7210 fs/ext4/extents.c:4238 + ext4_map_blocks+0xab3/0x1cb0 fs/ext4/inode.c:638 + ext4_getblk+0x187/0x6c0 fs/ext4/inode.c:848 + ext4_bread+0x2a/0x1c0 fs/ext4/inode.c:900 + ext4_append+0x1a4/0x360 fs/ext4/namei.c:67 + ext4_init_new_dir+0x337/0xa10 fs/ext4/namei.c:2768 + ext4_mkdir+0x4b8/0xc00 fs/ext4/namei.c:2814 + vfs_mkdir+0x45b/0x640 fs/namei.c:3819 + ovl_do_mkdir fs/overlayfs/overlayfs.h:161 [inline] + ovl_mkdir_real+0x53/0x1a0 fs/overlayfs/dir.c:146 + ovl_create_real+0x280/0x490 fs/overlayfs/dir.c:193 + ovl_workdir_create+0x425/0x600 fs/overlayfs/super.c:788 + ovl_make_workdir+0xed/0x1140 fs/overlayfs/super.c:1355 + ovl_get_workdir fs/overlayfs/super.c:1492 [inline] + ovl_fill_super+0x39ee/0x5370 fs/overlayfs/super.c:2035 + mount_nodev+0x52/0xe0 fs/super.c:1413 + legacy_get_tree+0xea/0x180 fs/fs_context.c:592 + vfs_get_tree+0x86/0x270 fs/super.c:1497 + do_new_mount fs/namespace.c:2903 [inline] + path_mount+0x196f/0x2be0 fs/namespace.c:3233 + do_mount fs/namespace.c:3246 [inline] + __do_sys_mount fs/namespace.c:3454 [inline] + __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3431 + do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x4665f9 +Code: ff ff c3 66 2e 0f 1f 84 +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞**: + 这是一个 Linux 内核 (Kernel) 的漏洞。漏洞发生在 IPv6 路由模块中,具体是 `fib6_nh_flush_exceptions` 函数中的越界读取问题。该漏洞是由 KASAN(Kernel Address Sanitizer)检测到的,表明在处理 IPv6 路由异常时,内核尝试访问了一个超出分配内存范围的地址。 + +3. **漏洞如何发生及效果**: + 漏洞发生在内核处理 IPv6 路由表的异常缓存清理过程中。由于对内存边界检查不足,可能导致内核读取未分配或非法的内存区域,从而引发以下潜在后果: + - 系统崩溃(内核 panic)。 + - 信息泄露:攻击者可能利用此漏洞读取内核内存中的敏感数据。 + - 潜在的提权风险:如果结合其他漏洞,可能进一步导致权限提升。 + +总结:这是一个内核级别的漏洞,与容器或隔离机制无关。 + +cve: ./data/2021/47xxx/CVE-2021-47136.json +In the Linux kernel, the following vulnerability has been resolved: + +net: zero-initialize tc skb extension on allocation + +Function skb_ext_add() doesn't initialize created skb extension with any +value and leaves it up to the user. However, since extension of type +TC_SKB_EXT originally contained only single value tc_skb_ext->chain its +users used to just assign the chain value without setting whole extension +memory to zero first. This assumption changed when TC_SKB_EXT extension was +extended with additional fields but not all users were updated to +initialize the new fields which leads to use of uninitialized memory +afterwards. UBSAN log: + +[ 778.299821] UBSAN: invalid-load in net/openvswitch/flow.c:899:28 +[ 778.301495] load of value 107 is not a valid value for type '_Bool' +[ 778.303215] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7+ #2 +[ 778.304933] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[ 778.307901] Call Trace: +[ 778.308680] +[ 778.309358] dump_stack+0xbb/0x107 +[ 778.310307] ubsan_epilogue+0x5/0x40 +[ 778.311167] __ubsan_handle_load_invalid_value.cold+0x43/0x48 +[ 778.312454] ? memset+0x20/0x40 +[ 778.313230] ovs_flow_key_extract.cold+0xf/0x14 [openvswitch] +[ 778.314532] ovs_vport_receive+0x19e/0x2e0 [openvswitch] +[ 778.315749] ? ovs_vport_find_upcall_portid+0x330/0x330 [openvswitch] +[ 778.317188] ? create_prof_cpu_mask+0x20/0x20 +[ 778.318220] ? arch_stack_walk+0x82/0xf0 +[ 778.319153] ? secondary_startup_64_no_verify+0xb0/0xbb +[ 778.320399] ? stack_trace_save+0x91/0xc0 +[ 778.321362] ? stack_trace_consume_entry+0x160/0x160 +[ 778.322517] ? lock_release+0x52e/0x760 +[ 778.323444] netdev_frame_hook+0x323/0x610 [openvswitch] +[ 778.324668] ? ovs_netdev_get_vport+0xe0/0xe0 [openvswitch] +[ 778.325950] __netif_receive_skb_core+0x771/0x2db0 +[ 778.327067] ? lock_downgrade+0x6e0/0x6f0 +[ 778.328021] ? lock_acquire+0x565/0x720 +[ 778.328940] ? generic_xdp_tx+0x4f0/0x4f0 +[ 778.329902] ? inet_gro_receive+0x2a7/0x10a0 +[ 778.330914] ? lock_downgrade+0x6f0/0x6f0 +[ 778.331867] ? udp4_gro_receive+0x4c4/0x13e0 +[ 778.332876] ? lock_release+0x52e/0x760 +[ 778.333808] ? dev_gro_receive+0xcc8/0x2380 +[ 778.334810] ? lock_downgrade+0x6f0/0x6f0 +[ 778.335769] __netif_receive_skb_list_core+0x295/0x820 +[ 778.336955] ? process_backlog+0x780/0x780 +[ 778.337941] ? mlx5e_rep_tc_netdevice_event_unregister+0x20/0x20 [mlx5_core] +[ 778.339613] ? seqcount_lockdep_reader_access.constprop.0+0xa7/0xc0 +[ 778.341033] ? kvm_clock_get_cycles+0x14/0x20 +[ 778.342072] netif_receive_skb_list_internal+0x5f5/0xcb0 +[ 778.343288] ? __kasan_kmalloc+0x7a/0x90 +[ 778.344234] ? mlx5e_handle_rx_cqe_mpwrq+0x9e0/0x9e0 [mlx5_core] +[ 778.345676] ? mlx5e_xmit_xdp_frame_mpwqe+0x14d0/0x14d0 [mlx5_core] +[ 778.347140] ? __netif_receive_skb_list_core+0x820/0x820 +[ 778.348351] ? mlx5e_post_rx_mpwqes+0xa6/0x25d0 [mlx5_core] +[ 778.349688] ? napi_gro_flush+0x26c/0x3c0 +[ 778.350641] napi_complete_done+0x188/0x6b0 +[ 778.351627] mlx5e_napi_poll+0x373/0x1b80 [mlx5_core] +[ 778.352853] __napi_poll+0x9f/0x510 +[ 778.353704] ? mlx5_flow_namespace_set_mode+0x260/0x260 [mlx5_core] +[ 778.355158] net_rx_action+0x34c/0xa40 +[ 778.356060] ? napi_threaded_poll+0x3d0/0x3d0 +[ 778.357083] ? sched_clock_cpu+0x18/0x190 +[ 778.358041] ? __common_interrupt+0x8e/0x1a0 +[ 778.359045] __do_softirq+0x1ce/0x984 +[ 778.359938] __irq_exit_rcu+0x137/0x1d0 +[ 778.360865] irq_exit_rcu+0xa/0x20 +[ 778.361708] common_interrupt+0x80/0xa0 +[ 778.362640] +[ 778.363212] asm_common_interrupt+0x1e/0x40 +[ 778.364204] RIP: 0010:native_safe_halt+0xe/0x10 +[ 778.365273] Code: 4f ff ff ff 4c 89 e7 e8 50 3f 40 fe e9 dc fe ff ff 48 89 df e8 43 3f 40 fe eb 90 cc e9 07 00 00 00 0f 00 2d 74 05 62 00 fb f4 90 e9 07 00 00 00 0f 00 2d 64 05 62 00 f4 c3 cc cc 0f 1f 44 00 +[ 778.369355] RSP: 0018:ffffffff84407e48 EFLAGS: 00000246 +[ 778.370570] RAX +---truncated--- + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞,如何发生,有何效果**: + - **程序**:Linux 内核 (Kernel) + - **漏洞原因**:`skb_ext_add()` 函数在分配 `tc skb extension` 时未初始化内存,导致扩展字段中新增的部分未被正确初始化。这使得后续代码可能使用未初始化的内存,从而引发未定义行为。 + - **效果**:该漏洞可能导致内核崩溃或信息泄漏。具体表现为 UBSAN 检测到非法值加载 (`invalid-load`),例如在 Open vSwitch 模块中处理流量时可能触发错误路径。 + +3. **结论**:由于此 CVE 仅涉及内核网络子系统的内存初始化问题,并未直接与 namespace、cgroup、container 或隔离机制相关,因此返回 "N/A" 即可。 + +cve: ./data/2021/47xxx/CVE-2021-47335.json +In the Linux kernel, the following vulnerability has been resolved: + +f2fs: fix to avoid racing on fsync_entry_slab by multi filesystem instances + +As syzbot reported, there is an use-after-free issue during f2fs recovery: + +Use-after-free write at 0xffff88823bc16040 (in kfence-#10): + kmem_cache_destroy+0x1f/0x120 mm/slab_common.c:486 + f2fs_recover_fsync_data+0x75b0/0x8380 fs/f2fs/recovery.c:869 + f2fs_fill_super+0x9393/0xa420 fs/f2fs/super.c:3945 + mount_bdev+0x26c/0x3a0 fs/super.c:1367 + legacy_get_tree+0xea/0x180 fs/fs_context.c:592 + vfs_get_tree+0x86/0x270 fs/super.c:1497 + do_new_mount fs/namespace.c:2905 [inline] + path_mount+0x196f/0x2be0 fs/namespace.c:3235 + do_mount fs/namespace.c:3248 [inline] + __do_sys_mount fs/namespace.c:3456 [inline] + __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3433 + do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +The root cause is multi f2fs filesystem instances can race on accessing +global fsync_entry_slab pointer, result in use-after-free issue of slab +cache, fixes to init/destroy this slab cache only once during module +init/destroy procedure to avoid this issue. + +analysis: 1. **分析这个CVE信息是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞,是内核Kernel,还是容器实现Docker,还是容器内部运行的应用,同时告诉我这个漏洞如何发生,它有何效果**: + N/A + +cve: ./data/2021/47xxx/CVE-2021-47379.json +In the Linux kernel, the following vulnerability has been resolved: + +blk-cgroup: fix UAF by grabbing blkcg lock before destroying blkg pd + +KASAN reports a use-after-free report when doing fuzz test: + +[693354.104835] ================================================================== +[693354.105094] BUG: KASAN: use-after-free in bfq_io_set_weight_legacy+0xd3/0x160 +[693354.105336] Read of size 4 at addr ffff888be0a35664 by task sh/1453338 + +[693354.105607] CPU: 41 PID: 1453338 Comm: sh Kdump: loaded Not tainted 4.18.0-147 +[693354.105610] Hardware name: Huawei 2288H V5/BC11SPSCB0, BIOS 0.81 07/02/2018 +[693354.105612] Call Trace: +[693354.105621] dump_stack+0xf1/0x19b +[693354.105626] ? show_regs_print_info+0x5/0x5 +[693354.105634] ? printk+0x9c/0xc3 +[693354.105638] ? cpumask_weight+0x1f/0x1f +[693354.105648] print_address_description+0x70/0x360 +[693354.105654] kasan_report+0x1b2/0x330 +[693354.105659] ? bfq_io_set_weight_legacy+0xd3/0x160 +[693354.105665] ? bfq_io_set_weight_legacy+0xd3/0x160 +[693354.105670] bfq_io_set_weight_legacy+0xd3/0x160 +[693354.105675] ? bfq_cpd_init+0x20/0x20 +[693354.105683] cgroup_file_write+0x3aa/0x510 +[693354.105693] ? ___slab_alloc+0x507/0x540 +[693354.105698] ? cgroup_file_poll+0x60/0x60 +[693354.105702] ? 0xffffffff89600000 +[693354.105708] ? usercopy_abort+0x90/0x90 +[693354.105716] ? mutex_lock+0xef/0x180 +[693354.105726] kernfs_fop_write+0x1ab/0x280 +[693354.105732] ? cgroup_file_poll+0x60/0x60 +[693354.105738] vfs_write+0xe7/0x230 +[693354.105744] ksys_write+0xb0/0x140 +[693354.105749] ? __ia32_sys_read+0x50/0x50 +[693354.105760] do_syscall_64+0x112/0x370 +[693354.105766] ? syscall_return_slowpath+0x260/0x260 +[693354.105772] ? do_page_fault+0x9b/0x270 +[693354.105779] ? prepare_exit_to_usermode+0xf9/0x1a0 +[693354.105784] ? enter_from_user_mode+0x30/0x30 +[693354.105793] entry_SYSCALL_64_after_hwframe+0x65/0xca + +[693354.105875] Allocated by task 1453337: +[693354.106001] kasan_kmalloc+0xa0/0xd0 +[693354.106006] kmem_cache_alloc_node_trace+0x108/0x220 +[693354.106010] bfq_pd_alloc+0x96/0x120 +[693354.106015] blkcg_activate_policy+0x1b7/0x2b0 +[693354.106020] bfq_create_group_hierarchy+0x1e/0x80 +[693354.106026] bfq_init_queue+0x678/0x8c0 +[693354.106031] blk_mq_init_sched+0x1f8/0x460 +[693354.106037] elevator_switch_mq+0xe1/0x240 +[693354.106041] elevator_switch+0x25/0x40 +[693354.106045] elv_iosched_store+0x1a1/0x230 +[693354.106049] queue_attr_store+0x78/0xb0 +[693354.106053] kernfs_fop_write+0x1ab/0x280 +[693354.106056] vfs_write+0xe7/0x230 +[693354.106060] ksys_write+0xb0/0x140 +[693354.106064] do_syscall_64+0x112/0x370 +[693354.106069] entry_SYSCALL_64_after_hwframe+0x65/0xca + +[693354.106114] Freed by task 1453336: +[693354.106225] __kasan_slab_free+0x130/0x180 +[693354.106229] kfree+0x90/0x1b0 +[693354.106233] blkcg_deactivate_policy+0x12c/0x220 +[693354.106238] bfq_exit_queue+0xf5/0x110 +[693354.106241] blk_mq_exit_sched+0x104/0x130 +[693354.106245] __elevator_exit+0x45/0x60 +[693354.106249] elevator_switch_mq+0xd6/0x240 +[693354.106253] elevator_switch+0x25/0x40 +[693354.106257] elv_iosched_store+0x1a1/0x230 +[693354.106261] queue_attr_store+0x78/0xb0 +[693354.106264] kernfs_fop_write+0x1ab/0x280 +[693354.106268] vfs_write+0xe7/0x230 +[693354.106271] ksys_write+0xb0/0x140 +[693354.106275] do_syscall_64+0x112/0x370 +[693354.106280] entry_SYSCALL_64_after_hwframe+0x65/0xca + +[693354.106329] The buggy address belongs to the object at ffff888be0a35580 + which belongs to the cache kmalloc-1k of size 1024 +[693354.106736] The buggy address is located 228 bytes inside of + 1024-byte region [ffff888be0a35580, ffff888be0a35980) +[693354.107114] The buggy address belongs to the page: +[693354.107273] page:ffffea002f828c00 count:1 mapcount:0 mapping:ffff888107c17080 index:0x0 compound_mapcount: 0 +[693354.107606] flags: 0x17ffffc0008100(slab|head) +[693354.107760] raw: 0017ffffc0008100 ffffea002fcbc808 ffffea0030bd3a08 ffff888107c17080 +[693354.108020] r +---truncated--- + +analysis: 1. 这个CVE信息与namespace、cgroup相关,因为它涉及到`blk-cgroup`的使用。 +2. 这是Linux内核的漏洞。漏洞发生在块设备控制组(blk-cgroup)的实现中,具体是在销毁`blkg policy data (pd)`时未正确获取`blkcg lock`,导致释放后使用(Use-After-Free, UAF)问题。此漏洞可能允许攻击者通过恶意操作控制组文件系统(cgroup fs)触发内存损坏,进而可能导致系统崩溃或权限提升。 + +cve: ./data/2021/47xxx/CVE-2021-47408.json +In the Linux kernel, the following vulnerability has been resolved: + +netfilter: conntrack: serialize hash resizes and cleanups + +Syzbot was able to trigger the following warning [1] + +No repro found by syzbot yet but I was able to trigger similar issue +by having 2 scripts running in parallel, changing conntrack hash sizes, +and: + +for j in `seq 1 1000` ; do unshare -n /bin/true >/dev/null ; done + +It would take more than 5 minutes for net_namespace structures +to be cleaned up. + +This is because nf_ct_iterate_cleanup() has to restart everytime +a resize happened. + +By adding a mutex, we can serialize hash resizes and cleanups +and also make get_next_corpse() faster by skipping over empty +buckets. + +Even without resizes in the picture, this patch considerably +speeds up network namespace dismantles. + +[1] +INFO: task syz-executor.0:8312 can't die for more than 144 seconds. +task:syz-executor.0 state:R running task stack:25672 pid: 8312 ppid: 6573 flags:0x00004006 +Call Trace: + context_switch kernel/sched/core.c:4955 [inline] + __schedule+0x940/0x26f0 kernel/sched/core.c:6236 + preempt_schedule_common+0x45/0xc0 kernel/sched/core.c:6408 + preempt_schedule_thunk+0x16/0x18 arch/x86/entry/thunk_64.S:35 + __local_bh_enable_ip+0x109/0x120 kernel/softirq.c:390 + local_bh_enable include/linux/bottom_half.h:32 [inline] + get_next_corpse net/netfilter/nf_conntrack_core.c:2252 [inline] + nf_ct_iterate_cleanup+0x15a/0x450 net/netfilter/nf_conntrack_core.c:2275 + nf_conntrack_cleanup_net_list+0x14c/0x4f0 net/netfilter/nf_conntrack_core.c:2469 + ops_exit_list+0x10d/0x160 net/core/net_namespace.c:171 + setup_net+0x639/0xa30 net/core/net_namespace.c:349 + copy_net_ns+0x319/0x760 net/core/net_namespace.c:470 + create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0xc1/0x1f0 kernel/nsproxy.c:226 + ksys_unshare+0x445/0x920 kernel/fork.c:3128 + __do_sys_unshare kernel/fork.c:3202 [inline] + __se_sys_unshare kernel/fork.c:3200 [inline] + __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3200 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x7f63da68e739 +RSP: 002b:00007f63d7c05188 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 +RAX: ffffffffffffffda RBX: 00007f63da792f80 RCX: 00007f63da68e739 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000040000000 +RBP: 00007f63da6e8cc4 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00007f63da792f80 +R13: 00007fff50b75d3f R14: 00007f63d7c05300 R15: 0000000000022000 + +Showing all locks held in the system: +1 lock held by khungtaskd/27: + #0: ffffffff8b980020 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446 +2 locks held by kworker/u4:2/153: + #0: ffff888010c69138 ((wq_completion)events_unbound){+.+.}-{0:0}, at: arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] + #0: ffff888010c69138 ((wq_completion)events_unbound){+.+.}-{0:0}, at: arch_atomic_long_set include/linux/atomic/atomic-long.h:41 [inline] + #0: ffff888010c69138 ((wq_completion)events_unbound){+.+.}-{0:0}, at: atomic_long_set include/linux/atomic/atomic-instrumented.h:1198 [inline] + #0: ffff888010c69138 ((wq_completion)events_unbound){+.+.}-{0:0}, at: set_work_data kernel/workqueue.c:634 [inline] + #0: ffff888010c69138 ((wq_completion)events_unbound){+.+.}-{0:0}, at: set_work_pool_and_clear_pending kernel/workqueue.c:661 [inline] + #0: ffff888010c69138 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x896/0x1690 kernel/workqueue.c:2268 + #1: ffffc9000140fdb0 ((kfence_timer).work){+.+.}-{0:0}, at: process_one_work+0x8ca/0x1690 kernel/workqueue.c:2272 +1 lock held by systemd-udevd/2970: +1 lock held by in:imklog/6258: + #0: ffff88807f970ff0 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:990 +3 locks held by kworker/1:6/8158: +1 lock held by syz-executor.0/8312: +2 locks held by kworker/u4:13/9320: +1 lock held by +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与namespace相关。具体来说,它涉及网络命名空间(net_namespace)的清理问题,而网络命名空间是Linux容器(如Docker)实现网络隔离的核心技术之一。 + +2. **这是什么程序的漏洞**: + 这是Linux内核(Kernel)中的漏洞。漏洞发生在`nf_conntrack_core.c`模块中,该模块负责连接跟踪(connection tracking),主要用于防火墙和NAT功能。当网络命名空间被销毁时,由于`nf_ct_iterate_cleanup()`函数在处理连接跟踪哈希表调整时需要重新启动,导致清理过程变慢甚至阻塞任务。 + +3. **漏洞如何发生及其效果**: + - 漏洞发生的原因是,在调整连接跟踪哈希表大小的同时,如果网络命名空间被销毁,`nf_ct_iterate_cleanup()`会因为哈希表调整而不断重新启动,从而显著延迟网络命名空间的清理过程。 + - 效果上,这会导致某些任务(如`syz-executor`)长时间无法退出,表现为系统性能下降或任务挂起。在容器环境中,这可能影响容器的快速创建和销毁,尤其是当频繁调整连接跟踪哈希大小时。 + +cve: ./data/2021/47xxx/CVE-2021-47579.json +In the Linux kernel, the following vulnerability has been resolved: + +ovl: fix warning in ovl_create_real() + +Syzbot triggered the following warning in ovl_workdir_create() -> +ovl_create_real(): + + if (!err && WARN_ON(!newdentry->d_inode)) { + +The reason is that the cgroup2 filesystem returns from mkdir without +instantiating the new dentry. + +Weird filesystems such as this will be rejected by overlayfs at a later +stage during setup, but to prevent such a warning, call ovl_mkdir_real() +directly from ovl_workdir_create() and reject this case early. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Linux内核的漏洞。 + - 漏洞发生的原因:在overlayfs(一种用于容器和文件系统合并的机制)中,当处理cgroup2文件系统时,`mkdir`操作没有正确实例化新的dentry,导致`ovl_create_real()`函数中触发了一个警告(WARN_ON)。 + - 漏洞效果:虽然这个问题不会直接导致系统崩溃或安全问题,但它会生成警告信息,并可能暴露内核对某些特殊文件系统的不兼容性。这可能间接影响依赖overlayfs的容器运行时(如Docker或Kubernetes中的容器),尤其是在使用cgroup2进行资源隔离时。通过修复,内核会在早期阶段拒绝这种不兼容的情况,避免警告的产生。 + +cve: ./data/2021/47xxx/CVE-2021-47584.json +In the Linux kernel, the following vulnerability has been resolved: + +iocost: Fix divide-by-zero on donation from low hweight cgroup + +The donation calculation logic assumes that the donor has non-zero +after-donation hweight, so the lowest active hweight a donating cgroup can +have is 2 so that it can donate 1 while keeping the other 1 for itself. +Earlier, we only donated from cgroups with sizable surpluses so this +condition was always true. However, with the precise donation algorithm +implemented, f1de2439ec43 ("blk-iocost: revamp donation amount +determination") made the donation amount calculation exact enabling even low +hweight cgroups to donate. + +This means that in rare occasions, a cgroup with active hweight of 1 can +enter donation calculation triggering the following warning and then a +divide-by-zero oops. + + WARNING: CPU: 4 PID: 0 at block/blk-iocost.c:1928 transfer_surpluses.cold+0x0/0x53 [884/94867] + ... + RIP: 0010:transfer_surpluses.cold+0x0/0x53 + Code: 92 ff 48 c7 c7 28 d1 ab b5 65 48 8b 34 25 00 ae 01 00 48 81 c6 90 06 00 00 e8 8b 3f fe ff 48 c7 c0 ea ff ff ff e9 95 ff 92 ff <0f> 0b 48 c7 c7 30 da ab b5 e8 71 3f fe ff 4c 89 e8 4d 85 ed 74 0 +4 + ... + Call Trace: + + ioc_timer_fn+0x1043/0x1390 + call_timer_fn+0xa1/0x2c0 + __run_timers.part.0+0x1ec/0x2e0 + run_timer_softirq+0x35/0x70 + ... + iocg: invalid donation weights in /a/b: active=1 donating=1 after=0 + +Fix it by excluding cgroups w/ active hweight < 2 from donating. Excluding +these extreme low hweight donations shouldn't affect work conservation in +any meaningful way. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,此CVE与cgroup相关。它涉及Linux内核中块设备I/O成本控制(blk-iocost)的捐赠逻辑,该逻辑是cgroup v2的一部分,用于管理I/O资源分配。 + +2. **这是什么程序的漏洞**: + - **程序**:Linux内核(Kernel)。 + - **漏洞发生原因**:在块设备I/O成本控制(blk-iocost)模块中,捐赠逻辑假设捐赠方的活动权重(hweight)大于零,但在某些情况下,具有极低活动权重(如1)的cgroup可能会尝试进行捐赠,导致除以零的错误。 + - **效果**:此漏洞可能导致内核崩溃(divide-by-zero oops),从而影响系统的稳定性和可用性。 + +3. **总结**:这是一个与cgroup相关的Linux内核漏洞,发生在块设备I/O成本控制模块中,可能导致系统崩溃。 + +cve: ./data/2021/47xxx/CVE-2021-47588.json +In the Linux kernel, the following vulnerability has been resolved: + +sit: do not call ipip6_dev_free() from sit_init_net() + +ipip6_dev_free is sit dev->priv_destructor, already called +by register_netdevice() if something goes wrong. + +Alternative would be to make ipip6_dev_free() robust against +multiple invocations, but other drivers do not implement this +strategy. + +syzbot reported: + +dst_release underflow +WARNING: CPU: 0 PID: 5059 at net/core/dst.c:173 dst_release+0xd8/0xe0 net/core/dst.c:173 +Modules linked in: +CPU: 1 PID: 5059 Comm: syz-executor.4 Not tainted 5.16.0-rc5-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:dst_release+0xd8/0xe0 net/core/dst.c:173 +Code: 4c 89 f2 89 d9 31 c0 5b 41 5e 5d e9 da d5 44 f9 e8 1d 90 5f f9 c6 05 87 48 c6 05 01 48 c7 c7 80 44 99 8b 31 c0 e8 e8 67 29 f9 <0f> 0b eb 85 0f 1f 40 00 53 48 89 fb e8 f7 8f 5f f9 48 83 c3 a8 48 +RSP: 0018:ffffc9000aa5faa0 EFLAGS: 00010246 +RAX: d6894a925dd15a00 RBX: 00000000ffffffff RCX: 0000000000040000 +RDX: ffffc90005e19000 RSI: 000000000003ffff RDI: 0000000000040000 +RBP: 0000000000000000 R08: ffffffff816a1f42 R09: ffffed1017344f2c +R10: ffffed1017344f2c R11: 0000000000000000 R12: 0000607f462b1358 +R13: 1ffffffff1bfd305 R14: ffffe8ffffcb1358 R15: dffffc0000000000 +FS: 00007f66c71a2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f88aaed5058 CR3: 0000000023e0f000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + dst_cache_destroy+0x107/0x1e0 net/core/dst_cache.c:160 + ipip6_dev_free net/ipv6/sit.c:1414 [inline] + sit_init_net+0x229/0x550 net/ipv6/sit.c:1936 + ops_init+0x313/0x430 net/core/net_namespace.c:140 + setup_net+0x35b/0x9d0 net/core/net_namespace.c:326 + copy_net_ns+0x359/0x5c0 net/core/net_namespace.c:470 + create_new_namespaces+0x4ce/0xa00 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0x11e/0x180 kernel/nsproxy.c:226 + ksys_unshare+0x57d/0xb50 kernel/fork.c:3075 + __do_sys_unshare kernel/fork.c:3146 [inline] + __se_sys_unshare kernel/fork.c:3144 [inline] + __x64_sys_unshare+0x34/0x40 kernel/fork.c:3144 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x7f66c882ce99 +Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f66c71a2168 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 +RAX: ffffffffffffffda RBX: 00007f66c893ff60 RCX: 00007f66c882ce99 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000048040200 +RBP: 00007f66c8886ff1 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007fff6634832f R14: 00007f66c71a2300 R15: 0000000000022000 + + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 相关。具体来说,问题出现在 `unshare` 系统调用中,该调用允许进程创建新的命名空间(namespaces),而命名空间是 Linux 容器(如 Docker)实现隔离的核心技术之一。 + +2. **这是什么程序的漏洞**: + 这是 **Linux 内核(Kernel)** 的漏洞。漏洞发生在内核处理 IPv6-in-IPv4 隧道设备(SIT,Simple Internet Transition)初始化时,错误地调用了 `ipip6_dev_free()` 函数,导致 `dst_release` 发生 underflow(溢出)。此问题由 `syzbot` 报告,并通过模糊测试发现。 + +3. **漏洞如何发生及其效果**: + - 漏洞发生在 SIT 设备的初始化函数 `sit_init_net()` 中,错误地调用了 `ipip6_dev_free()`,而该函数已经在其他地方被调用过,因此导致了重复释放的问题。 + - 当用户通过 `unshare` 系统调用尝试创建新的网络命名空间时,触发了这一问题,最终导致内核崩溃(kernel panic)或不稳定。 + - 此漏洞可能允许攻击者通过精心构造的操作(例如特定的网络配置或命名空间操作)导致系统崩溃,从而造成拒绝服务(Denial of Service, DoS)攻击。 + +cve: ./data/2021/4xxx/CVE-2021-4154.json +A use-after-free flaw was found in cgroup1_parse_param in kernel/cgroup/cgroup-v1.c in the Linux kernel's cgroup v1 parser. A local attacker with a user privilege could cause a privilege escalation by exploiting the fsconfig syscall parameter leading to a container breakout and a denial of service on the system. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 cgroup(控制组)直接相关,同时也涉及容器隔离。 + +2. **漏洞所属程序及影响分析**: + - **程序类型**:这是 Linux 内核 (Kernel) 的漏洞,具体位于 cgroup v1 的参数解析器 (`cgroup1_parse_param`) 中。 + - **漏洞发生原因**:在处理 `fsconfig` 系统调用时,存在一个 use-after-free 漏洞,导致释放后的内存被再次访问。 + - **效果**: + - **特权提升**:本地攻击者可以通过利用此漏洞实现权限提升。 + - **容器逃逸**:由于 cgroup 是容器隔离机制的重要组成部分,此漏洞可能导致容器突破(container breakout),从而使攻击者从容器中逃逸到宿主机。 + - **拒绝服务**:攻击者可能通过触发该漏洞导致系统崩溃或不可用,从而造成拒绝服务 (DoS) 攻击。 + +### 结论: +该 CVE 与 cgroup 和容器隔离密切相关,是一个 Linux 内核中的漏洞,可能导致容器逃逸和系统权限提升等严重后果。 + +cve: ./data/2021/4xxx/CVE-2021-4197.json +An unprivileged write to the file handler flaw in the Linux kernel's control groups and namespaces subsystem was found in the way users have access to some less privileged process that are controlled by cgroups and have higher privileged parent process. It is actually both for cgroup2 and cgroup1 versions of control groups. A local user could use this flaw to crash the system or escalate their privileges on the system. + +analysis: 1. 该CVE信息与namespace、cgroup相关。 + +2. 这是Linux内核的漏洞。漏洞发生在内核的控制组(cgroups)和命名空间(namespaces)子系统中,由于用户可以访问某些由cgroups控制的低特权进程(这些进程有高特权父进程),导致未授权的写文件句柄操作成为可能。此漏洞存在于cgroup1和cgroup2两个版本的控制组实现中。攻击者可利用此漏洞使系统崩溃或在系统上提升权限。 + +3. 漏洞效果:可能导致系统崩溃或本地权限提升。 + +cve: ./data/2022/0xxx/CVE-2022-0070.json +Incomplete fix for CVE-2021-3100. The Apache Log4j hotpatch package starting with log4j-cve-2021-44228-hotpatch-1.1-16 will now explicitly mimic the Linux capabilities and cgroups of the target Java process that the hotpatch is applied to. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 该CVE描述中提到Linux capabilities和cgroups,因此与cgroups相关。 + +2. **程序漏洞分析**: + - 这是Apache Log4j热修复包的漏洞,涉及Log4j-cve-2021-44228-hotpatch工具。 + - 漏洞发生在热修复包试图模拟目标Java进程的Linux capabilities和cgroups时。 + - 效果:由于修复不完整(Incomplete fix),可能允许攻击者绕过预期的安全限制,导致潜在的权限提升或其他安全问题。 + +**结论**:与cgroups相关。这是容器内部运行的应用(Apache Log4j)的漏洞。 + +cve: ./data/2022/0xxx/CVE-2022-0185.json +A heap-based buffer overflow flaw was found in the way the legacy_parse_param function in the Filesystem Context functionality of the Linux kernel verified the supplied parameters length. An unprivileged (in case of unprivileged user namespaces enabled, otherwise needs namespaced CAP_SYS_ADMIN privilege) local user able to open a filesystem that does not support the Filesystem Context API (and thus fallbacks to legacy handling) could use this flaw to escalate their privileges on the system. + +analysis: 1. 该CVE信息与namespace相关,因为描述中提到“unprivileged user namespaces”和“namespaced CAP_SYS_ADMIN privilege”。 + +2. 这是Linux内核的漏洞。漏洞发生在`legacy_parse_param`函数中,该函数属于Linux内核的Filesystem Context功能模块。当处理文件系统参数长度时,由于存在堆基缓冲区溢出问题,可能导致以下效果: + - 如果启用了非特权用户命名空间(unprivileged user namespaces),任何本地非特权用户都可以利用此漏洞。 + - 如果未启用非特权用户命名空间,则攻击者需要具有命名空间内的`CAP_SYS_ADMIN`权限。 + - 攻击者可以通过打开一个不支持Filesystem Context API的文件系统(从而触发旧版处理逻辑)来利用此漏洞,最终实现权限提升。 + +cve: ./data/2022/0xxx/CVE-2022-0492.json +A vulnerability was found in the Linux kernel’s cgroup_release_agent_write in the kernel/cgroup/cgroup-v1.c function. This flaw, under certain circumstances, allows the use of the cgroups v1 release_agent feature to escalate privileges and bypass the namespace isolation unexpectedly. + +analysis: 1. 该CVE信息与namespace、cgroup、container以及隔离相关。 + +2. 这是Linux内核的漏洞。漏洞发生在`cgroup_release_agent_write`函数中,位于`kernel/cgroup/cgroup-v1.c`文件。由于cgroups v1的`release_agent`功能在特定情况下未正确处理权限检查或隔离机制,攻击者可以利用此漏洞绕过namespace隔离,进而实现权限提升。其效果是允许非特权用户突破容器或namespace的隔离限制,可能影响整个主机系统的安全性。 + +cve: ./data/2022/0xxx/CVE-2022-0532.json +An incorrect sysctls validation vulnerability was found in CRI-O 1.18 and earlier. The sysctls from the list of "safe" sysctls specified for the cluster will be applied to the host if an attacker is able to create a pod with a hostIPC and hostNetwork kernel namespace. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,此 CVE 与 namespace 和容器隔离相关。具体涉及 `hostIPC` 和 `hostNetwork` 内核命名空间。 + +2. **程序漏洞分析**: + - **程序**:这是 CRI-O 的漏洞,CRI-O 是一个容器运行时实现,通常用于 Kubernetes 环境中。 + - **漏洞发生原因**:CRI-O 在处理 sysctls 配置时存在验证不当的问题。攻击者可以通过创建一个具有 `hostIPC` 和 `hostNetwork` 命名空间的 Pod,使得本应仅在容器内生效的 "safe" sysctls 被应用到主机上。 + - **效果**:此漏洞可能导致主机系统的配置被意外修改,从而削弱容器与主机之间的隔离性,甚至可能进一步引发主机的安全风险或稳定性问题。 + +cve: ./data/2022/1xxx/CVE-2022-1055.json +A use-after-free exists in the Linux Kernel in tc_new_tfilter that could allow a local attacker to gain privilege escalation. The exploit requires unprivileged user namespaces. We recommend upgrading past commit 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 信息与 namespace 相关。描述中明确提到 "unprivileged user namespaces",这表明漏洞利用了用户命名空间的功能。 + +2. **程序的漏洞分析:** + - **程序类型:** 内核 (Linux Kernel) + - **漏洞发生位置:** 漏洞存在于 `tc_new_tfilter` 函数中,这是一个与流量控制 (Traffic Control) 相关的函数。 + - **漏洞原因:** 存在一个 use-after-free 的问题,即在释放内存后仍然尝试访问已释放的内存区域。 + - **影响后果:** 本地攻击者可以利用此漏洞实现权限提升 (privilege escalation)。由于漏洞需要使用非特权用户命名空间 (unprivileged user namespaces),因此它可能被用于突破容器隔离(例如在 Docker 或其他基于 Linux 容器的环境中)。 + +总结:CVE-2022-1055 是 Linux 内核中的一个 use-after-free 漏洞,涉及流量控制模块,并依赖用户命名空间进行利用,可能导致权限提升,对容器隔离构成威胁。 + +cve: ./data/2022/23xxx/CVE-2022-23774.json +Docker Desktop before 4.4.4 on Windows allows attackers to move arbitrary files. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop程序的漏洞。该漏洞发生在Windows平台上的Docker Desktop版本低于4.4.4时,攻击者可以利用此漏洞移动任意文件。此漏洞的效果是破坏系统的文件完整性,可能导致敏感文件被篡改或删除,进一步可能被利用来提升权限或执行其他恶意操作。 + +cve: ./data/2022/24xxx/CVE-2022-24122.json +kernel/ucount.c in the Linux kernel 5.14 through 5.16.4, when unprivileged user namespaces are enabled, allows a use-after-free and privilege escalation because a ucounts object can outlive its namespace. + +analysis: 1. 该 CVE 信息与 namespace 相关。 + +2. 这是 Linux 内核(Kernel)的漏洞。漏洞发生在 `kernel/ucount.c` 文件中,当启用非特权用户命名空间(unprivileged user namespaces)时,由于 `ucounts` 对象可以比其所属的命名空间存活时间更长,导致出现 use-after-free 问题。攻击者可以利用此漏洞进行权限提升(privilege escalation)。 + +cve: ./data/2022/24xxx/CVE-2022-24769.json +Moby is an open-source project created by Docker to enable and accelerate software containerization. A bug was found in Moby (Docker Engine) prior to version 20.10.14 where containers were incorrectly started with non-empty inheritable Linux process capabilities, creating an atypical Linux environment and enabling programs with inheritable file capabilities to elevate those capabilities to the permitted set during `execve(2)`. Normally, when executable programs have specified permitted file capabilities, otherwise unprivileged users and processes can execute those programs and gain the specified file capabilities up to the bounding set. Due to this bug, containers which included executable programs with inheritable file capabilities allowed otherwise unprivileged users and processes to additionally gain these inheritable file capabilities up to the container's bounding set. Containers which use Linux users and groups to perform privilege separation inside the container are most directly impacted. This bug did not affect the container security sandbox as the inheritable set never contained more capabilities than were included in the container's bounding set. This bug has been fixed in Moby (Docker Engine) 20.10.14. Running containers should be stopped, deleted, and recreated for the inheritable capabilities to be reset. This fix changes Moby (Docker Engine) behavior such that containers are started with a more typical Linux environment. As a workaround, the entry point of a container can be modified to use a utility like `capsh(1)` to drop inheritable capabilities prior to the primary process starting. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 信息与容器(container)相关,具体涉及 Moby(Docker Engine)在启动容器时的 Linux 进程能力(capabilities)处理问题。虽然没有直接提到 namespace 或 cgroup,但问题的核心在于容器环境中的进程能力和权限隔离机制。 + +2. **程序漏洞分析**: + - **漏洞所属程序**:这是 Moby(Docker Engine)的漏洞,而非内核或容器内部运行的应用。 + - **漏洞发生原因**:在 Moby(Docker Engine)版本 20.10.14 之前,容器启动时未正确初始化 inheritable Linux 进程能力集,导致容器中包含 inheritable 文件能力的可执行程序能够将这些能力提升到 permitted 集合中。正常情况下,这种行为不应该发生。 + - **漏洞效果**:此问题允许容器内的非特权用户或进程通过特定的文件能力提升其权限,从而破坏容器内的权限分离机制。尽管容器的安全沙箱本身未被突破(因为 inheritable 能力集始终受限于容器的 bounding set),但这仍然可能导致容器内部的权限滥用,特别是在使用 Linux 用户和组进行权限分离的场景下。 + +总结:该 CVE 与容器相关,涉及 Moby(Docker Engine)在容器启动时对进程能力的错误处理,可能导致容器内权限分离机制失效。 + +cve: ./data/2022/24xxx/CVE-2022-24778.json +The imgcrypt library provides API exensions for containerd to support encrypted container images and implements the ctd-decoder command line tool for use by containerd to decrypt encrypted container images. The imgcrypt function `CheckAuthorization` is supposed to check whether the current used is authorized to access an encrypted image and prevent the user from running an image that another user previously decrypted on the same system. In versions prior to 1.1.4, a failure occurs when an image with a ManifestList is used and the architecture of the local host is not the first one in the ManifestList. Only the first architecture in the list was tested, which may not have its layers available locally since it could not be run on the host architecture. Therefore, the verdict on unavailable layers was that the image could be run anticipating that image run failure would occur later due to the layers not being available. However, this verdict to allow the image to run enabled other architectures in the ManifestList to run an image without providing keys if that image had previously been decrypted. A patch has been applied to imgcrypt 1.1.4. Workarounds may include usage of different namespaces for each remote user. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与容器和隔离相关。具体涉及`imgcrypt`库对加密容器镜像的支持,以及`containerd`在解密镜像时的行为。此外,描述中提到可以使用不同的命名空间(namespaces)作为变通方案,进一步表明了其与隔离机制的相关性。 + +2. **漏洞所属程序及影响分析**: + - **程序**:此漏洞属于`imgcrypt`库,该库为`containerd`提供支持以处理加密容器镜像。 + - **漏洞发生原因**:在`imgcrypt`的`CheckAuthorization`函数中,当处理包含`ManifestList`的镜像时,仅检查了清单列表中的第一个架构是否可用。如果本地主机架构不是清单中的第一个架构,则可能会错误地允许运行镜像,即使该镜像的层(layers)不可用。 + - **漏洞效果**:这一行为导致其他架构的用户可以在没有提供正确密钥的情况下运行之前已被其他用户解密的镜像。这破坏了授权检查机制,可能允许未经授权的用户访问敏感的加密容器镜像内容,从而引发潜在的安全风险。 + - **隔离影响**:由于`containerd`是容器运行时的一部分,这种漏洞可能会影响不同用户之间的隔离性,尤其是在多租户环境中,可能导致跨用户的镜像访问权限泄露。 + +cve: ./data/2022/25xxx/CVE-2022-25365.json +Docker Desktop before 4.5.1 on Windows allows attackers to move arbitrary files. NOTE: this issue exists because of an incomplete fix for CVE-2022-23774. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,此CVE与容器相关。 + +2. **程序漏洞分析**: + - 这是 Docker Desktop 程序的漏洞。 + - 漏洞发生的原因是 Docker Desktop 在 Windows 上的一个修复不完整(针对 CVE-2022-23774 的修复未完全解决相关问题)。 + - 攻击者可以利用此漏洞移动任意文件,这可能破坏容器的隔离性,导致宿主机文件系统被篡改或敏感信息泄露。 + +cve: ./data/2022/26xxx/CVE-2022-26659.json +Docker Desktop installer on Windows in versions before 4.6.0 allows an attacker to overwrite any administrator writable files by creating a symlink in place of where the installer writes its log file. Starting from version 4.6.0, the Docker Desktop installer, when run elevated, will write its log files to a location not writable by non-administrator users. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + N/A + +2. **漏洞分析:** + - 漏洞所属程序:Docker Desktop 安装程序(Windows 平台)。 + - 漏洞发生原因:在 4.6.0 之前的版本中,Docker Desktop 安装程序在写入日志文件时,允许攻击者通过创建符号链接(symlink)来覆盖任何管理员可写的文件。 + - 漏洞效果:攻击者可以利用此漏洞覆盖系统中的关键文件,可能导致系统配置破坏、恶意代码注入或其他安全问题。 + +由于该漏洞与容器运行时的隔离机制(如 namespace、cgroup 等)无关,因此无需进一步分析其对容器隔离性的影响。 + +cve: ./data/2022/27xxx/CVE-2022-27649.json +A flaw was found in Podman, where containers were started incorrectly with non-empty default permissions. A vulnerability was found in Moby (Docker Engine), where containers were started incorrectly with non-empty inheritable Linux process capabilities. This flaw allows an attacker with access to programs with inheritable file capabilities to elevate those capabilities to the permitted set when execve(2) runs. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,这个CVE与容器相关。 + +2. **漏洞分析**: + - 这是容器实现程序(Podman 和 Moby/Docker Engine)的漏洞。 + - 漏洞发生的原因是容器在启动时,默认权限设置不正确,导致容器内的进程继承了过多的 Linux 进程能力(capabilities)。 + - 效果:攻击者可以利用此漏洞,在容器内通过 `execve(2)` 系统调用将继承的能力提升到允许的能力集(permitted set),从而实现权限提升。这种权限提升可能会破坏容器的隔离性,使攻击者能够对宿主机或其他容器造成潜在威胁。 + +cve: ./data/2022/27xxx/CVE-2022-27650.json +A flaw was found in crun where containers were incorrectly started with non-empty default permissions. A vulnerability was found in Moby (Docker Engine) where containers were started incorrectly with non-empty inheritable Linux process capabilities. This flaw allows an attacker with access to programs with inheritable file capabilities to elevate those capabilities to the permitted set when execve(2) runs. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是容器运行时程序的漏洞,涉及两个组件: + - 漏洞影响了 `crun`(一个轻量级的 OCI 容器运行时)和 `Moby (Docker Engine)`。 + - 漏洞发生的原因是:在启动容器时,未正确设置默认权限,导致容器中的进程被赋予了非空的继承性 Linux 能力(inheritable capabilities)。 + - 效果:攻击者可以通过利用这些非空的继承性能力,在执行 `execve(2)` 系统调用时,将这些能力提升到允许的能力集(permitted capabilities),从而实现权限提升。这可能破坏容器的隔离性,使攻击者能够在容器内获得更高的权限,甚至可能影响宿主系统。 + +cve: ./data/2022/27xxx/CVE-2022-27651.json +A flaw was found in buildah where containers were incorrectly started with non-empty default permissions. A bug was found in Moby (Docker Engine) where containers were incorrectly started with non-empty inheritable Linux process capabilities, enabling an attacker with access to programs with inheritable file capabilities to elevate those capabilities to the permitted set when execve(2) runs. This has the potential to impact confidentiality and integrity. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是容器实现程序(Docker和Buildah)的漏洞。 + - 漏洞发生的原因:在Moby(Docker Engine)和Buildah中,容器被错误地以非空的继承性Linux进程能力启动。 + - 效果:攻击者如果能够访问具有继承文件能力的程序,可以通过`execve(2)`系统调用将这些能力提升到允许的能力集,从而导致权限提升。这可能影响系统的保密性和完整性。 + +cve: ./data/2022/27xxx/CVE-2022-27652.json +A flaw was found in cri-o, where containers were incorrectly started with non-empty default permissions. A vulnerability was found in Moby (Docker Engine) where containers started incorrectly with non-empty inheritable Linux process capabilities. This flaw allows an attacker with access to programs with inheritable file capabilities to elevate those capabilities to the permitted set when execve(2) runs. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是容器实现程序(cri-o 和 Moby/Docker Engine)的漏洞。 + - 漏洞发生的原因:容器在启动时被赋予了非空的默认权限,特别是非空的继承性 Linux 进程能力(inheritable capabilities)。 + - 效果:攻击者如果能够访问具有继承性文件能力的程序,可以通过 `execve(2)` 系统调用将这些能力提升到允许的能力集(permitted capabilities),从而导致权限提升。这可能使攻击者突破容器的隔离环境,获取更高的权限,甚至影响宿主机的安全。 + +cve: ./data/2022/29xxx/CVE-2022-29582.json +In the Linux kernel before 5.17.3, fs/io_uring.c has a use-after-free due to a race condition in io_uring timeouts. This can be triggered by a local user who has no access to any user namespace; however, the race condition perhaps can only be exploited infrequently. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个 CVE 与 namespace 相关。描述中提到该漏洞可以通过一个没有访问任何用户命名空间(user namespace)权限的本地用户触发。 + +2. **程序漏洞分析:** + - **程序:** 这是 Linux 内核(Kernel)的漏洞。 + - **漏洞发生位置:** 漏洞发生在 `fs/io_uring.c` 文件中,具体是由于 `io_uring` 超时处理中的竞争条件(race condition)导致了 use-after-free 问题。 + - **漏洞效果:** 本地用户即使没有对任何用户命名空间的访问权限,也可能利用此漏洞。虽然利用频率可能较低,但成功利用后可能导致任意代码执行、系统崩溃或其他未定义行为,从而破坏系统的稳定性或安全性。 + +总结:这是一个与用户命名空间相关的 Linux 内核漏洞,涉及 `io_uring` 的超时处理逻辑,可能导致 use-after-free 问题。 + +cve: ./data/2022/30xxx/CVE-2022-30137.json +Executive Summary +An Elevation of Privilege (EOP) vulnerability has been identified within Service Fabric clusters that run Docker containers. Exploitation of this EOP vulnerability requires an attacker to gain remote code execution within a container. All Service Fabric and Docker versions are impacted. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器(Docker containers)相关,并且涉及到容器内的隔离问题。 + +2. **程序漏洞分析**: + - **程序**:这是 Docker 容器在 Service Fabric 集群中的漏洞。 + - **漏洞发生方式**:攻击者需要首先获得容器内的远程代码执行权限,然后利用此漏洞提升权限(Elevation of Privilege, EOP)。 + - **效果**:成功利用此漏洞后,攻击者可以在宿主系统或其他容器中获得更高的权限,从而突破容器的隔离机制,可能进一步影响整个 Service Fabric 集群的安全性。 + +总结:该 CVE 与容器隔离相关,涉及 Docker 容器在 Service Fabric 中的权限提升问题。 + +cve: ./data/2022/31xxx/CVE-2022-31214.json +A Privilege Context Switching issue was discovered in join.c in Firejail 0.9.68. By crafting a bogus Firejail container that is accepted by the Firejail setuid-root program as a join target, a local attacker can enter an environment in which the Linux user namespace is still the initial user namespace, the NO_NEW_PRIVS prctl is not activated, and the entered mount namespace is under the attacker's control. In this way, the filesystem layout can be adjusted to gain root privileges through execution of available setuid-root binaries such as su or sudo. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace 和容器隔离相关。它涉及用户命名空间(user namespace)和挂载命名空间(mount namespace),并且 Firejail 是一个用于创建沙盒环境(类似于轻量级容器)的工具。 + +2. **漏洞所属程序及影响分析**: + - 这是 **Firejail** 的漏洞,而不是内核 Kernel 或 Docker 等其他容器实现。 + - 漏洞发生的原因是 Firejail 在处理 `join` 操作时,未能正确地限制权限上下文切换。具体来说: + - 攻击者可以构造一个伪造的 Firejail 容器环境。 + - 当使用 Firejail 的 setuid-root 程序作为目标加入此伪造容器时,进入的环境中仍然保留初始用户命名空间(initial user namespace),这意味着没有真正降权。 + - 同时,`NO_NEW_PRIVS` 控制未被激活,允许进一步提升权限。 + - 攻击者控制的挂载命名空间(mount namespace)使得文件系统布局可被调整,从而通过执行可用的 setuid-root 程序(如 `su` 或 `sudo`)获取 root 权限。 + - **效果**:本地攻击者可以通过此漏洞突破 Firejail 的隔离机制,最终获得系统的 root 权限。 + +cve: ./data/2022/31xxx/CVE-2022-31647.json +Docker Desktop before 4.6.0 on Windows allows attackers to delete any file through the hyperv/destroy dockerBackendV2 API via a symlink in the DataFolder parameter, a different vulnerability than CVE-2022-26659. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 该 CVE 与容器技术相关,因为它涉及 Docker Desktop 的 API 漏洞,并且可以通过 symlink(符号链接)攻击影响文件删除操作。 + +2. **程序漏洞分析**: + - **程序**:这是 Docker Desktop(容器实现)的漏洞,而不是内核或容器内部运行的应用。 + - **漏洞发生方式**:攻击者可以通过调用 `hyperv/destroy` API 并在 `DataFolder` 参数中构造一个符号链接,从而触发此漏洞。 + - **效果**:攻击者可以利用此漏洞删除目标系统上的任意文件,这可能破坏系统的完整性和可用性。 + +cve: ./data/2022/32xxx/CVE-2022-32250.json +net/netfilter/nf_tables_api.c in the Linux kernel through 5.18.1 allows a local user (able to create user/net namespaces) to escalate privileges to root because an incorrect NFT_STATEFUL_EXPR check leads to a use-after-free. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与namespace相关。描述中明确提到“user/net namespaces”,这表明问题涉及用户命名空间和网络命名空间,这两者是Linux容器隔离机制的重要组成部分。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核(Kernel)的漏洞。 + - **漏洞发生原因**:在`net/netfilter/nf_tables_api.c`模块中,由于`NFT_STATEFUL_EXPR`检查不正确,导致了use-after-free漏洞。 + - **效果**:攻击者可以通过创建用户命名空间和网络命名空间,利用此漏洞实现权限提升,从普通用户权限升级到root权限。这破坏了Linux系统的隔离机制,可能对容器环境的安全性造成严重影响。 + +cve: ./data/2022/32xxx/CVE-2022-32481.json +Dell PowerProtect Cyber Recovery, versions prior to 19.11, contain a privilege escalation vulnerability on virtual appliance deployments. A lower-privileged authenticated user can chain docker commands to escalate privileges to root leading to complete system takeover. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,该CVE与容器相关,因为描述中明确提到通过链式使用 Docker 命令进行提权。 + +2. **漏洞分析**: + - **程序类型**:这是 Dell PowerProtect Cyber Recovery 虚拟设备部署中的漏洞。 + - **漏洞原因**:较低权限的已认证用户可以通过组合(chain)Docker 命令来提升权限至 root。 + - **效果**:攻击者可以完全接管系统,获得最高控制权。 + +总结:该漏洞与容器(Docker)相关,利用了容器命令的不当权限管理,导致从低权限用户到 root 的提权,最终实现系统完全控制。 + +cve: ./data/2022/34xxx/CVE-2022-34292.json +Docker Desktop for Windows before 4.6.0 allows attackers to overwrite any file through a symlink attack on the hyperv/create dockerBackendV2 API by controlling the DataFolder parameter for DockerDesktop.vhdx, a similar issue to CVE-2022-31647. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop for Windows的漏洞。 + - 漏洞发生的原因:攻击者可以通过hyperv/create dockerBackendV2 API对`DataFolder`参数进行控制,利用符号链接(symlink)攻击,覆盖DockerDesktop.vhdx文件中的任意文件。 + - 效果:攻击者可以篡改DockerDesktop.vhdx中的文件,可能导致数据损坏、恶意代码注入或其他安全问题,从而破坏容器的隔离性或主机系统的安全性。 + +cve: ./data/2022/34xxx/CVE-2022-34918.json +An issue was discovered in the Linux kernel through 5.18.9. A type confusion bug in nft_set_elem_init (leading to a buffer overflow) could be used by a local attacker to escalate privileges, a different vulnerability than CVE-2022-32250. (The attacker can obtain root access, but must start with an unprivileged user namespace to obtain CAP_NET_ADMIN access.) This can be fixed in nft_setelem_parse_data in net/netfilter/nf_tables_api.c. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,此 CVE 与 namespace 相关。攻击者需要从一个未特权的用户命名空间(unprivileged user namespace)开始,以获取 `CAP_NET_ADMIN` 权限,这表明漏洞利用依赖于用户命名空间的隔离特性。 + +2. **程序的漏洞及影响**: + - 这是 Linux 内核中的漏洞,具体位于 `nf_tables` 子系统中。 + - 漏洞类型是类型混淆(type confusion),发生在 `nft_set_elem_init` 函数中,可能导致缓冲区溢出。 + - 攻击者可以通过此漏洞在本地提升权限,最终获得 root 权限。 + - 漏洞发生的前提是攻击者已经具备了未特权用户命名空间中的某些能力(例如通过 `CAP_NET_ADMIN` 管理网络相关的功能)。 + +总结:这是一个 Linux 内核漏洞,与用户命名空间相关,攻击者可以利用它从未特权的用户命名空间提升到 root 权限。 + +cve: ./data/2022/36xxx/CVE-2022-36109.json +Moby is an open-source project created by Docker to enable software containerization. A bug was found in Moby (Docker Engine) where supplementary groups are not set up properly. If an attacker has direct access to a container and manipulates their supplementary group access, they may be able to use supplementary group access to bypass primary group restrictions in some cases, potentially gaining access to sensitive information or gaining the ability to execute code in that container. This bug is fixed in Moby (Docker Engine) 20.10.18. Running containers should be stopped and restarted for the permissions to be fixed. For users unable to upgrade, this problem can be worked around by not using the `"USER $USERNAME"` Dockerfile instruction. Instead by calling `ENTRYPOINT ["su", "-", "user"]` the supplementary groups will be set up properly. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与容器(container)相关。 + +2. **程序漏洞分析**: + - **漏洞所属程序**:这是 Moby(Docker Engine)的漏洞。Moby 是 Docker 的开源实现,用于支持容器化技术。 + - **漏洞发生原因**:在 Moby 中,容器的补充用户组(supplementary groups)未被正确设置。如果攻击者能够直接访问容器并操控其补充用户组的权限,可能会绕过主用户组(primary group)的限制。 + - **漏洞效果**:攻击者可能利用此漏洞访问敏感信息,或者获得在容器内执行代码的能力。这破坏了容器的安全隔离机制,可能导致数据泄露或未经授权的代码执行。 + +总结:该 CVE 与容器相关,是 Moby(Docker Engine)中的一个漏洞,源于补充用户组设置不当,可能导致容器内的安全隔离被破坏。 + +cve: ./data/2022/37xxx/CVE-2022-37326.json +Docker Desktop for Windows before 4.6.0 allows attackers to delete (or create) any file through the dockerBackendV2 windowscontainers/start API by controlling the pidfile field inside the DaemonJSON field in the WindowsContainerStartRequest class. This can indirectly lead to privilege escalation. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**:是的,此 CVE 与容器相关。 + +2. **程序漏洞分析**: + - 这是 Docker Desktop for Windows 的漏洞。 + - 漏洞发生的原因是 `dockerBackendV2 windowscontainers/start API` 对输入参数缺乏严格的验证,攻击者可以通过操控 `pidfile` 字段(位于 `DaemonJSON` 字段中)来删除或创建任意文件。 + - 效果:该漏洞可能导致特权升级,因为攻击者可以利用此漏洞修改关键系统文件或植入恶意文件,从而间接获得更高的权限。 + +总结:此 CVE 与容器相关,涉及 Docker Desktop for Windows 的 API 输入验证问题,可能导致特权升级。 + +cve: ./data/2022/38xxx/CVE-2022-38730.json +Docker Desktop for Windows before 4.6 allows attackers to overwrite any file through the windowscontainers/start dockerBackendV2 API by controlling the data-root field inside the DaemonJSON field in the WindowsContainerStartRequest class. This allows exploiting a symlink vulnerability in ..\dataRoot\network\files\local-kv.db because of a TOCTOU race condition. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,这个CVE与容器相关。 + +2. **漏洞分析**: + - 这是Docker Desktop for Windows的漏洞。 + - 漏洞发生的原因是:在`windowscontainers/start` API中,攻击者可以通过控制`DaemonJSON`字段中的`data-root`值来触发漏洞。由于存在时间戳检查和实际使用之间的竞争条件(TOCTOU,Time of Check to Time of Use),导致符号链接(symlink)处理不当。 + - 效果:攻击者可以利用此漏洞覆盖任意文件,这可能会导致系统文件被篡改,从而进一步破坏系统的完整性和安全性。 + +总结:这是一个Docker实现的漏洞,与容器技术相关,可能导致任意文件覆盖的风险。 + +cve: ./data/2022/39xxx/CVE-2022-39206.json +Onedev is an open source, self-hosted Git Server with CI/CD and Kanban. When using Docker-based job executors, the Docker socket (e.g. /var/run/docker.sock on Linux) is mounted into each Docker step. Users that can define and trigger CI/CD jobs on a project could use this to control the Docker daemon on the host machine. This is a known dangerous pattern, as it can be used to break out of Docker containers and, in most cases, gain root privileges on the host system. This issue allows regular (non-admin) users to potentially take over the build infrastructure of a OneDev instance. Attackers need to have an account (or be able to register one) and need permission to create a project. Since code.onedev.io has the right preconditions for this to be exploited by remote attackers, it could have been used to hijack builds of OneDev itself, e.g. by injecting malware into the docker images that are built and pushed to Docker Hub. The impact is increased by this as described before. Users are advised to upgrade to 7.3.0 or higher. There are no known workarounds for this issue. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,此 CVE 信息与容器和隔离相关。 + +2. **漏洞分析**: + - **程序**:这是 OneDev 的漏洞,OneDev 是一个包含 CI/CD 功能的 Git 服务器。 + - **漏洞发生原因**:当使用 Docker-based job executors 时,Docker 套接字(如 `/var/run/docker.sock`)会被挂载到每个 Docker 容器中。如果用户能够定义并触发 CI/CD 作业,则可以利用该套接字控制主机上的 Docker 守护进程。 + - **效果**:这种模式允许普通用户(非管理员)突破容器隔离,获取主机系统的 root 权限,从而可能接管 OneDev 实例的构建基础设施。攻击者可以通过注入恶意代码到构建的 Docker 镜像中,并将其推送到 Docker Hub,进一步扩大影响范围。 + +总结:此漏洞与容器隔离机制相关,攻击者可通过访问挂载的 Docker 套接字突破容器隔离,获取主机权限。 + +cve: ./data/2022/41xxx/CVE-2022-41737.json +IBM Storage Scale Container Native Storage Access 5.1.2.1 through 5.1.7.0 could allow a local attacker to initiate connections from a container outside the current namespace. IBM X-Force ID: 237811. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,此 CVE 信息与 namespace、container 和隔离相关。 + +2. **程序漏洞分析**: + - 这是 **IBM Storage Scale Container Native Storage Access** 程序的漏洞。 + - 漏洞发生的原因是该程序未能正确限制容器的网络行为,导致本地攻击者可以从一个容器发起连接,并突破当前 namespace 的限制。 + - 效果:攻击者可以利用此漏洞从容器中发起连接,访问或影响其他本应被隔离的资源,从而破坏容器隔离性。 + +cve: ./data/2022/43xxx/CVE-2022-43679.json +The Docker image of ownCloud Server through 10.11 contains a misconfiguration that renders the trusted_domains config useless. This could be abused to spoof the URL in password-reset e-mail messages. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是容器实现Docker中的漏洞。具体来说,ownCloud Server的Docker镜像存在配置错误,导致`trusted_domains`配置失效。这种漏洞的发生是因为镜像内的配置文件未正确设置或验证,使得攻击者可以滥用此问题伪造密码重置邮件中的URL。其效果是允许攻击者通过欺骗用户点击伪造的URL来执行钓鱼攻击或其他恶意行为。 + +cve: ./data/2022/46xxx/CVE-2022-46167.json +Capsule is a multi-tenancy and policy-based framework for Kubernetes. Prior to version 0.1.3, a ServiceAccount deployed in a Tenant Namespace, when granted with `PATCH` capabilities on its own Namespace, is able to edit it and remove the Owner Reference, breaking the reconciliation of the Capsule Operator and removing all the enforcement like Pod Security annotations, Network Policies, Limit Range and Resource Quota items. An attacker could detach the Namespace from a Tenant that is forbidding starting privileged Pods using the Pod Security labels by removing the OwnerReference, removing the enforcement labels, and being able to start privileged containers that would be able to start a generic Kubernetes privilege escalation. Patches have been released for version 0.1.3. No known workarounds are available. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与 namespace 和容器隔离相关。具体来说,问题涉及 Kubernetes 中的 Namespace 和 Tenant Namespace 的管理,以及通过移除 Owner Reference 来破坏隔离机制。 + +2. **程序漏洞分析:** + - **程序:** 这是 Capsule(一个基于 Kubernetes 的多租户和策略框架)的漏洞。 + - **漏洞发生原因:** 在 Capsule 版本 0.1.3 之前,如果在 Tenant Namespace 中部署了一个 ServiceAccount,并且该 ServiceAccount 被授予了对其自身 Namespace 的 `PATCH` 权限,则可以编辑 Namespace 并移除 Owner Reference。这一操作会破坏 Capsule Operator 的协调机制,从而移除诸如 Pod Security 注解、网络策略、资源配额等强制性限制。 + - **漏洞效果:** 攻击者可以通过移除 OwnerReference 和强制性标签,绕过 Tenant 的限制(例如禁止启动特权容器的 Pod Security 策略)。这将允许攻击者启动特权容器,从而可能引发 Kubernetes 集群中的权限升级问题。这种行为直接破坏了 Kubernetes 的隔离机制,可能导致严重的安全后果。 + +cve: ./data/2022/47xxx/CVE-2022-47952.json +lxc-user-nic in lxc through 5.0.1 is installed setuid root, and may allow local users to infer whether any file exists, even within a protected directory tree, because "Failed to open" often indicates that a file does not exist, whereas "does not refer to a network namespace path" often indicates that a file exists. NOTE: this is different from CVE-2018-6556 because the CVE-2018-6556 fix design was based on the premise that "we will report back to the user that the open() failed but the user has no way of knowing why it failed"; however, in many realistic cases, there are no plausible reasons for failing except that the file does not exist. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器和隔离相关。LXC(Linux Containers)是一个用于创建和管理 Linux 容器的工具,而 `lxc-user-nic` 是 LXC 的一个组件,涉及网络命名空间(network namespace)的操作。因此,这个漏洞与容器隔离机制中的命名空间功能密切相关。 + +2. **程序漏洞分析**: + - **程序**:这是 LXC 的漏洞,LXC 是一种容器实现技术,而不是内核或容器内部运行的应用。 + - **漏洞发生原因**:`lxc-user-nic` 组件被设置为 setuid root,这意味着普通用户可以以 root 权限执行该程序。由于该程序在处理文件路径时返回了不同的错误消息(如 "Failed to open" 和 "does not refer to a network namespace path"),攻击者可以通过这些错误消息推断出受保护目录树中文件的存在性。 + - **效果**:此漏洞允许本地用户绕过文件系统的访问控制,推测受保护目录中文件的存在性。这可能会泄露系统的信息,并进一步帮助攻击者进行其他攻击。虽然它不直接破坏隔离,但削弱了容器环境的安全性,可能为更严重的攻击提供线索。 + +cve: ./data/2022/48xxx/CVE-2022-48638.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup: cgroup_get_from_id() must check the looked-up kn is a directory + +cgroup has to be one kernfs dir, otherwise kernel panic is caused, +especially cgroup id is provide from userspace. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Linux内核的漏洞。漏洞发生在`cgroup_get_from_id()`函数中,当从用户空间提供的cgroup ID查找对应的kernfs节点时,未正确检查该节点是否为目录。如果节点不是目录,可能会导致内核崩溃(kernel panic)。攻击者可以通过提供恶意的cgroup ID来触发此漏洞,从而破坏系统的稳定性。 + +cve: ./data/2022/48xxx/CVE-2022-48659.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/slub: fix to return errno if kmalloc() fails + +In create_unique_id(), kmalloc(, GFP_KERNEL) can fail due to +out-of-memory, if it fails, return errno correctly rather than +triggering panic via BUG_ON(); + +kernel BUG at mm/slub.c:5893! +Internal error: Oops - BUG: 0 [#1] PREEMPT SMP + +Call trace: + sysfs_slab_add+0x258/0x260 mm/slub.c:5973 + __kmem_cache_create+0x60/0x118 mm/slub.c:4899 + create_cache mm/slab_common.c:229 [inline] + kmem_cache_create_usercopy+0x19c/0x31c mm/slab_common.c:335 + kmem_cache_create+0x1c/0x28 mm/slab_common.c:390 + f2fs_kmem_cache_create fs/f2fs/f2fs.h:2766 [inline] + f2fs_init_xattr_caches+0x78/0xb4 fs/f2fs/xattr.c:808 + f2fs_fill_super+0x1050/0x1e0c fs/f2fs/super.c:4149 + mount_bdev+0x1b8/0x210 fs/super.c:1400 + f2fs_mount+0x44/0x58 fs/f2fs/super.c:4512 + legacy_get_tree+0x30/0x74 fs/fs_context.c:610 + vfs_get_tree+0x40/0x140 fs/super.c:1530 + do_new_mount+0x1dc/0x4e4 fs/namespace.c:3040 + path_mount+0x358/0x914 fs/namespace.c:3370 + do_mount fs/namespace.c:3383 [inline] + __do_sys_mount fs/namespace.c:3591 [inline] + __se_sys_mount fs/namespace.c:3568 [inline] + __arm64_sys_mount+0x2f8/0x408 fs/namespace.c:3568 + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + N/A + +2. **程序漏洞分析:** + 这是 Linux 内核中的一个漏洞,具体发生在内存管理子系统 SLUB(Small Low-Locked Uninterrupted Block allocator)中。漏洞的原因是 `kmalloc()` 在内存不足的情况下返回失败时,代码没有正确处理错误,而是触发了内核 BUG(`BUG_ON()`),导致系统崩溃(panic)。此问题在创建唯一 ID 的函数 `create_unique_id()` 中被发现。 + + - **涉及的程序:** Linux 内核 + - **漏洞发生原因:** 当系统内存不足时,`kmalloc()` 可能会失败,但代码未正确检查返回值,直接调用了 `BUG_ON()`,导致内核崩溃。 + - **效果:** 如果系统在低内存情况下尝试分配内存,可能会触发内核崩溃,影响系统的稳定性。 + +3. **结论:** + N/A + +cve: ./data/2022/48xxx/CVE-2022-48671.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup: Add missing cpus_read_lock() to cgroup_attach_task_all() + +syzbot is hitting percpu_rwsem_assert_held(&cpu_hotplug_lock) warning at +cpuset_attach() [1], for commit 4f7e7236435ca0ab ("cgroup: Fix +threadgroup_rwsem <-> cpus_read_lock() deadlock") missed that +cpuset_attach() is also called from cgroup_attach_task_all(). +Add cpus_read_lock() like what cgroup_procs_write_start() does. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 cgroup 直接相关。 + +2. **程序漏洞分析**: + - 这是 **Linux 内核 (Kernel)** 的漏洞。 + - 漏洞发生在 cgroup(控制组)的功能实现中,具体是在 `cgroup_attach_task_all()` 函数中缺少了对 `cpus_read_lock()` 的调用。 + - 漏洞效果:由于缺少 `cpus_read_lock()`,可能导致竞争条件或死锁问题。具体表现为,在 CPU 热插拔锁定 (`cpu_hotplug_lock`) 的场景下,`percpu_rwsem_assert_held()` 触发警告,进而可能影响系统的稳定性和可靠性。 + +总结:这是一个 Linux 内核中 cgroup 相关的漏洞,可能导致系统在特定操作下的死锁或不稳定问题。 + +cve: ./data/2022/48xxx/CVE-2022-48757.json +In the Linux kernel, the following vulnerability has been resolved: + +net: fix information leakage in /proc/net/ptype + +In one net namespace, after creating a packet socket without binding +it to a device, users in other net namespaces can observe the new +`packet_type` added by this packet socket by reading `/proc/net/ptype` +file. This is minor information leakage as packet socket is +namespace aware. + +Add a net pointer in `packet_type` to keep the net namespace of +of corresponding packet socket. In `ptype_seq_show`, this net pointer +must be checked when it is not NULL. + +analysis: 1. 该CVE信息与namespace相关。 + +2. 这是Linux内核的漏洞。该漏洞发生在处理网络命名空间(net namespace)时,具体是在创建一个未绑定到设备的packet socket后,其他网络命名空间的用户可以通过读取`/proc/net/ptype`文件观察到新增的`packet_type`。这种信息泄露虽然轻微,但违背了packet socket的命名空间隔离性。 + +效果:此漏洞导致跨网络命名空间的信息泄露,攻击者可能利用此漏洞获取不应该访问的packet socket类型信息。 + +cve: ./data/2022/48xxx/CVE-2022-48759.json +In the Linux kernel, the following vulnerability has been resolved: + +rpmsg: char: Fix race between the release of rpmsg_ctrldev and cdev + +struct rpmsg_ctrldev contains a struct cdev. The current code frees +the rpmsg_ctrldev struct in rpmsg_ctrldev_release_device(), but the +cdev is a managed object, therefore its release is not predictable +and the rpmsg_ctrldev could be freed before the cdev is entirely +released, as in the backtrace below. + +[ 93.625603] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x7c +[ 93.636115] WARNING: CPU: 0 PID: 12 at lib/debugobjects.c:488 debug_print_object+0x13c/0x1b0 +[ 93.644799] Modules linked in: veth xt_cgroup xt_MASQUERADE rfcomm algif_hash algif_skcipher af_alg uinput ip6table_nat fuse uvcvideo videobuf2_vmalloc venus_enc venus_dec videobuf2_dma_contig hci_uart btandroid btqca snd_soc_rt5682_i2c bluetooth qcom_spmi_temp_alarm snd_soc_rt5682v +[ 93.715175] CPU: 0 PID: 12 Comm: kworker/0:1 Tainted: G B 5.4.163-lockdep #26 +[ 93.723855] Hardware name: Google Lazor (rev3 - 8) with LTE (DT) +[ 93.730055] Workqueue: events kobject_delayed_cleanup +[ 93.735271] pstate: 60c00009 (nZCv daif +PAN +UAO) +[ 93.740216] pc : debug_print_object+0x13c/0x1b0 +[ 93.744890] lr : debug_print_object+0x13c/0x1b0 +[ 93.749555] sp : ffffffacf5bc7940 +[ 93.752978] x29: ffffffacf5bc7940 x28: dfffffd000000000 +[ 93.758448] x27: ffffffacdb11a800 x26: dfffffd000000000 +[ 93.763916] x25: ffffffd0734f856c x24: dfffffd000000000 +[ 93.769389] x23: 0000000000000000 x22: ffffffd0733c35b0 +[ 93.774860] x21: ffffffd0751994a0 x20: ffffffd075ec27c0 +[ 93.780338] x19: ffffffd075199100 x18: 00000000000276e0 +[ 93.785814] x17: 0000000000000000 x16: dfffffd000000000 +[ 93.791291] x15: ffffffffffffffff x14: 6e6968207473696c +[ 93.796768] x13: 0000000000000000 x12: ffffffd075e2b000 +[ 93.802244] x11: 0000000000000001 x10: 0000000000000000 +[ 93.807723] x9 : d13400dff1921900 x8 : d13400dff1921900 +[ 93.813200] x7 : 0000000000000000 x6 : 0000000000000000 +[ 93.818676] x5 : 0000000000000080 x4 : 0000000000000000 +[ 93.824152] x3 : ffffffd0732a0fa4 x2 : 0000000000000001 +[ 93.829628] x1 : ffffffacf5bc7580 x0 : 0000000000000061 +[ 93.835104] Call trace: +[ 93.837644] debug_print_object+0x13c/0x1b0 +[ 93.841963] __debug_check_no_obj_freed+0x25c/0x3c0 +[ 93.846987] debug_check_no_obj_freed+0x18/0x20 +[ 93.851669] slab_free_freelist_hook+0xbc/0x1e4 +[ 93.856346] kfree+0xfc/0x2f4 +[ 93.859416] rpmsg_ctrldev_release_device+0x78/0xb8 +[ 93.864445] device_release+0x84/0x168 +[ 93.868310] kobject_cleanup+0x12c/0x298 +[ 93.872356] kobject_delayed_cleanup+0x10/0x18 +[ 93.876948] process_one_work+0x578/0x92c +[ 93.881086] worker_thread+0x804/0xcf8 +[ 93.884963] kthread+0x2a8/0x314 +[ 93.888303] ret_from_fork+0x10/0x18 + +The cdev_device_add/del() API was created to address this issue (see +commit '233ed09d7fda ("chardev: add helper function to register char +devs with a struct device")'), use it instead of cdev add/del(). + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离无关。 + +2. 这是Linux内核的漏洞。该漏洞发生在rpmsg字符设备的实现中,具体是因为`rpmsg_ctrldev_release_device()`函数释放了`rpmsg_ctrldev`结构体,但其中包含的`cdev`(字符设备)是一个受管理的对象,其释放时间不可预测,可能导致`rpmsg_ctrldev`在`cdev`完全释放之前被释放,从而引发竞争条件和潜在的内存损坏问题。 + +3. 该漏洞的效果可能包括系统崩溃或内核错误(如提供的回溯日志所示),但没有直接提到与容器或隔离机制相关的行为。 + +**结论:N/A** + +cve: ./data/2022/48xxx/CVE-2022-48799.json +In the Linux kernel, the following vulnerability has been resolved: + +perf: Fix list corruption in perf_cgroup_switch() + +There's list corruption on cgrp_cpuctx_list. This happens on the +following path: + + perf_cgroup_switch: list_for_each_entry(cgrp_cpuctx_list) + cpu_ctx_sched_in + ctx_sched_in + ctx_pinned_sched_in + merge_sched_in + perf_cgroup_event_disable: remove the event from the list + +Use list_for_each_entry_safe() to allow removing an entry during +iteration. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Linux内核的漏洞。该漏洞发生在性能监控工具perf与cgroup交互的过程中,具体是在`perf_cgroup_switch()`函数中,由于在遍历`cgrp_cpuctx_list`链表时未正确处理链表项的移除操作,导致链表损坏(list corruption)。此问题可能影响cgroup相关的资源控制和性能事件监控功能,进而可能导致系统不稳定或潜在的资源隔离失效。攻击者可能利用此漏洞破坏cgroup的正常运作,从而影响依赖cgroup实现隔离的容器环境。 + +cve: ./data/2022/48xxx/CVE-2022-48810.json +In the Linux kernel, the following vulnerability has been resolved: + +ipmr,ip6mr: acquire RTNL before calling ip[6]mr_free_table() on failure path + +ip[6]mr_free_table() can only be called under RTNL lock. + +RTNL: assertion failed at net/core/dev.c (10367) +WARNING: CPU: 1 PID: 5890 at net/core/dev.c:10367 unregister_netdevice_many+0x1246/0x1850 net/core/dev.c:10367 +Modules linked in: +CPU: 1 PID: 5890 Comm: syz-executor.2 Not tainted 5.16.0-syzkaller-11627-g422ee58dc0ef #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:unregister_netdevice_many+0x1246/0x1850 net/core/dev.c:10367 +Code: 0f 85 9b ee ff ff e8 69 07 4b fa ba 7f 28 00 00 48 c7 c6 00 90 ae 8a 48 c7 c7 40 90 ae 8a c6 05 6d b1 51 06 01 e8 8c 90 d8 01 <0f> 0b e9 70 ee ff ff e8 3e 07 4b fa 4c 89 e7 e8 86 2a 59 fa e9 ee +RSP: 0018:ffffc900046ff6e0 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: ffff888050f51d00 RSI: ffffffff815fa008 RDI: fffff520008dfece +RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 +R10: ffffffff815f3d6e R11: 0000000000000000 R12: 00000000fffffff4 +R13: dffffc0000000000 R14: ffffc900046ff750 R15: ffff88807b7dc000 +FS: 00007f4ab736e700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fee0b4f8990 CR3: 000000001e7d2000 CR4: 00000000003506e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + mroute_clean_tables+0x244/0xb40 net/ipv6/ip6mr.c:1509 + ip6mr_free_table net/ipv6/ip6mr.c:389 [inline] + ip6mr_rules_init net/ipv6/ip6mr.c:246 [inline] + ip6mr_net_init net/ipv6/ip6mr.c:1306 [inline] + ip6mr_net_init+0x3f0/0x4e0 net/ipv6/ip6mr.c:1298 + ops_init+0xaf/0x470 net/core/net_namespace.c:140 + setup_net+0x54f/0xbb0 net/core/net_namespace.c:331 + copy_net_ns+0x318/0x760 net/core/net_namespace.c:475 + create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110 + copy_namespaces+0x391/0x450 kernel/nsproxy.c:178 + copy_process+0x2e0c/0x7300 kernel/fork.c:2167 + kernel_clone+0xe7/0xab0 kernel/fork.c:2555 + __do_sys_clone+0xc8/0x110 kernel/fork.c:2672 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x7f4ab89f9059 +Code: Unable to access opcode bytes at RIP 0x7f4ab89f902f. +RSP: 002b:00007f4ab736e118 EFLAGS: 00000206 ORIG_RAX: 0000000000000038 +RAX: ffffffffffffffda RBX: 00007f4ab8b0bf60 RCX: 00007f4ab89f9059 +RDX: 0000000020000280 RSI: 0000000020000270 RDI: 0000000040200000 +RBP: 00007f4ab8a5308d R08: 0000000020000300 R09: 0000000020000300 +R10: 00000000200002c0 R11: 0000000000000206 R12: 0000000000000000 +R13: 00007ffc3977cc1f R14: 00007f4ab736e300 R15: 0000000000022000 + + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个 CVE 与 namespace 和隔离机制相关。具体来说,问题出现在创建新的网络命名空间 (`create_new_namespaces`) 的过程中,涉及 `copy_net_ns` 和 `setup_net` 函数的调用链。 + +2. **这是什么程序的漏洞:** + 这是 Linux 内核 (Kernel) 的漏洞。漏洞发生在网络多播路由 (`ipmr` 和 `ip6mr`) 模块中,当在失败路径上释放路由表时未正确获取 RTNL 锁,导致断言失败和潜在的系统崩溃。 + +3. **漏洞如何发生及效果:** + - **发生原因:** 在处理网络命名空间初始化的过程中,`ip6mr_free_table()` 函数被调用以释放路由表,但该函数要求必须在持有 RTNL 锁的情况下执行。如果锁未正确获取,就会触发断言失败 (`RTNL: assertion failed`)。 + - **效果:** 此漏洞可能导致内核恐慌 (kernel panic),从而中断系统的正常运行。这尤其会影响使用网络命名空间的场景,例如容器环境中的网络隔离功能。攻击者可能通过恶意操作触发此漏洞,导致宿主机或容器不可用。 + +cve: ./data/2022/48xxx/CVE-2022-48944.json +In the Linux kernel, the following vulnerability has been resolved: + +sched: Fix yet more sched_fork() races + +Where commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an +invalid sched_task_group") fixed a fork race vs cgroup, it opened up a +race vs syscalls by not placing the task on the runqueue before it +gets exposed through the pidhash. + +Commit 13765de8148f ("sched/fair: Fix fault in reweight_entity") is +trying to fix a single instance of this, instead fix the whole class +of issues, effectively reverting this commit. + +analysis: 1. 这个CVE信息与cgroup相关,因为它提到了在`sched_fork()`中修复了与cgroup相关的竞争条件问题。 + +2. 这是Linux内核的漏洞。 + - 漏洞发生在调度器(scheduler)的`sched_fork()`函数中,当创建新任务时,存在与cgroup相关的竞争条件。具体来说,任务在被添加到运行队列之前,就已经通过pidhash暴露出来了,这可能导致不一致的状态或访问无效的`sched_task_group`。 + - 效果:此漏洞可能允许攻击者利用竞争条件来破坏任务组的隔离性,从而影响cgroup的功能,甚至可能导致系统不稳定或资源管理错误。 + +cve: ./data/2022/48xxx/CVE-2022-48988.json +In the Linux kernel, the following vulnerability has been resolved: + +memcg: fix possible use-after-free in memcg_write_event_control() + +memcg_write_event_control() accesses the dentry->d_name of the specified +control fd to route the write call. As a cgroup interface file can't be +renamed, it's safe to access d_name as long as the specified file is a +regular cgroup file. Also, as these cgroup interface files can't be +removed before the directory, it's safe to access the parent too. + +Prior to 347c4a874710 ("memcg: remove cgroup_event->cft"), there was a +call to __file_cft() which verified that the specified file is a regular +cgroupfs file before further accesses. The cftype pointer returned from +__file_cft() was no longer necessary and the commit inadvertently dropped +the file type check with it allowing any file to slip through. With the +invarients broken, the d_name and parent accesses can now race against +renames and removals of arbitrary files and cause use-after-free's. + +Fix the bug by resurrecting the file type check in __file_cft(). Now that +cgroupfs is implemented through kernfs, checking the file operations needs +to go through a layer of indirection. Instead, let's check the superblock +and dentry type. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关** + 是的,这个 CVE 与 cgroup 直接相关。漏洞涉及内存控制组(memcg)中的 `memcg_write_event_control()` 函数,该函数处理 cgroup 接口文件的操作。cgroup 是 Linux 内核中用于实现资源限制和隔离的重要机制,广泛应用于容器技术(如 Docker 和 Kubernetes)中。 + +2. **这是什么程序的漏洞?如何发生?有何效果?** + - **程序**: 这是 Linux 内核 (Kernel) 的漏洞,具体发生在内存控制组 (memcg) 的实现中。 + - **漏洞发生机制**: + 在修复提交 `347c4a874710` ("memcg: remove cgroup_event->cft") 中,内核开发者移除了对文件类型的检查 (`__file_cft()`),这导致任意文件可以通过后续的访问逻辑。由于 cgroup 接口文件的不变性假设被破坏,`d_name` 和父目录的访问可能与其他操作(如重命名或删除)竞争,从而引发 use-after-free 漏洞。 + - **漏洞效果**: + 攻击者可能利用此漏洞触发 use-after-free,进而导致系统崩溃(拒绝服务攻击)或潜在的权限提升。在容器环境中,这种漏洞可能会被用来破坏 cgroup 的隔离性,影响宿主系统的稳定性或安全性。 + +总结:这是一个与 cgroup 相关的 Linux 内核漏洞,可能导致 use-after-free,影响系统稳定性和容器隔离性。 + +cve: ./data/2022/49xxx/CVE-2022-49003.json +In the Linux kernel, the following vulnerability has been resolved: + +nvme: fix SRCU protection of nvme_ns_head list + +Walking the nvme_ns_head siblings list is protected by the head's srcu +in nvme_ns_head_submit_bio() but not nvme_mpath_revalidate_paths(). +Removing namespaces from the list also fails to synchronize the srcu. +Concurrent scan work can therefore cause use-after-frees. + +Hold the head's srcu lock in nvme_mpath_revalidate_paths() and +synchronize with the srcu, not the global RCU, in nvme_ns_remove(). + +Observed the following panic when making NVMe/RDMA connections +with native multipath on the Rocky Linux 8.6 kernel +(it seems the upstream kernel has the same race condition). +Disassembly shows the faulting instruction is cmp 0x50(%rdx),%rcx; +computing capacity != get_capacity(ns->disk). +Address 0x50 is dereferenced because ns->disk is NULL. +The NULL disk appears to be the result of concurrent scan work +freeing the namespace (note the log line in the middle of the panic). + +[37314.206036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 +[37314.206036] nvme0n3: detected capacity change from 0 to 11811160064 +[37314.299753] PGD 0 P4D 0 +[37314.299756] Oops: 0000 [#1] SMP PTI +[37314.299759] CPU: 29 PID: 322046 Comm: kworker/u98:3 Kdump: loaded Tainted: G W X --------- - - 4.18.0-372.32.1.el8test86.x86_64 #1 +[37314.299762] Hardware name: Dell Inc. PowerEdge R720/0JP31P, BIOS 2.7.0 05/23/2018 +[37314.299763] Workqueue: nvme-wq nvme_scan_work [nvme_core] +[37314.299783] RIP: 0010:nvme_mpath_revalidate_paths+0x26/0xb0 [nvme_core] +[37314.299790] Code: 1f 44 00 00 66 66 66 66 90 55 53 48 8b 5f 50 48 8b 83 c8 c9 00 00 48 8b 13 48 8b 48 50 48 39 d3 74 20 48 8d 42 d0 48 8b 50 20 <48> 3b 4a 50 74 05 f0 80 60 70 ef 48 8b 50 30 48 8d 42 d0 48 39 d3 +[37315.058803] RSP: 0018:ffffabe28f913d10 EFLAGS: 00010202 +[37315.121316] RAX: ffff927a077da800 RBX: ffff92991dd70000 RCX: 0000000001600000 +[37315.206704] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff92991b719800 +[37315.292106] RBP: ffff929a6b70c000 R08: 000000010234cd4a R09: c0000000ffff7fff +[37315.377501] R10: 0000000000000001 R11: ffffabe28f913a30 R12: 0000000000000000 +[37315.462889] R13: ffff92992716600c R14: ffff929964e6e030 R15: ffff92991dd70000 +[37315.548286] FS: 0000000000000000(0000) GS:ffff92b87fb80000(0000) knlGS:0000000000000000 +[37315.645111] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[37315.713871] CR2: 0000000000000050 CR3: 0000002208810006 CR4: 00000000000606e0 +[37315.799267] Call Trace: +[37315.828515] nvme_update_ns_info+0x1ac/0x250 [nvme_core] +[37315.892075] nvme_validate_or_alloc_ns+0x2ff/0xa00 [nvme_core] +[37315.961871] ? __blk_mq_free_request+0x6b/0x90 +[37316.015021] nvme_scan_work+0x151/0x240 [nvme_core] +[37316.073371] process_one_work+0x1a7/0x360 +[37316.121318] ? create_worker+0x1a0/0x1a0 +[37316.168227] worker_thread+0x30/0x390 +[37316.212024] ? create_worker+0x1a0/0x1a0 +[37316.258939] kthread+0x10a/0x120 +[37316.297557] ? set_kthread_struct+0x50/0x50 +[37316.347590] ret_from_fork+0x35/0x40 +[37316.390360] Modules linked in: nvme_rdma nvme_tcp(X) nvme_fabrics nvme_core netconsole iscsi_tcp libiscsi_tcp dm_queue_length dm_service_time nf_conntrack_netlink br_netfilter bridge stp llc overlay nft_chain_nat ipt_MASQUERADE nf_nat xt_addrtype xt_CT nft_counter xt_state xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment xt_multiport nft_compat nf_tables libcrc32c nfnetlink dm_multipath tg3 rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm intel_rapl_msr iTCO_wdt iTCO_vendor_support dcdbas intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel ib_uverbs rapl intel_cstate intel_uncore ib_core ipmi_si joydev mei_me pcspkr ipmi_devintf mei lpc_ich wmi ipmi_msghandler acpi_power_meter ex +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **漏洞分析**: + - **程序类型**:这是 Linux 内核 (Kernel) 的漏洞,具体涉及 NVMe 子系统。 + - **漏洞发生原因**:在 `nvme_ns_head_submit_bio()` 中访问 `nvme_ns_head` 兄弟列表时使用了 SRCU(Sleepable Read-Copy Update)保护,但在 `nvme_mpath_revalidate_paths()` 中未正确同步 SRCU。此外,在从列表中移除命名空间时也未同步 SRCU,这可能导致并发扫描工作引发 use-after-free 问题。 + - **漏洞效果**:当使用 NVMe/RDMA 连接并启用本地多路径功能时,可能会触发内核崩溃(kernel panic)。日志显示了一个空指针解引用错误 (`BUG: unable to handle kernel NULL pointer dereference`),原因是 `ns->disk` 被释放后仍被访问。 + +3. **结论**: + N/A + +cve: ./data/2022/49xxx/CVE-2022-49087.json +In the Linux kernel, the following vulnerability has been resolved: + +rxrpc: fix a race in rxrpc_exit_net() + +Current code can lead to the following race: + +CPU0 CPU1 + +rxrpc_exit_net() + rxrpc_peer_keepalive_worker() + if (rxnet->live) + + rxnet->live = false; + del_timer_sync(&rxnet->peer_keepalive_timer); + + timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); + + cancel_work_sync(&rxnet->peer_keepalive_work); + +rxrpc_exit_net() exits while peer_keepalive_timer is still armed, +leading to use-after-free. + +syzbot report was: + +ODEBUG: free active (active state 0) object type: timer_list hint: rxrpc_peer_keepalive_timeout+0x0/0xb0 +WARNING: CPU: 0 PID: 3660 at lib/debugobjects.c:505 debug_print_object+0x16e/0x250 lib/debugobjects.c:505 +Modules linked in: +CPU: 0 PID: 3660 Comm: kworker/u4:6 Not tainted 5.17.0-syzkaller-13993-g88e6c0207623 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: netns cleanup_net +RIP: 0010:debug_print_object+0x16e/0x250 lib/debugobjects.c:505 +Code: ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 af 00 00 00 48 8b 14 dd 00 1c 26 8a 4c 89 ee 48 c7 c7 00 10 26 8a e8 b1 e7 28 05 <0f> 0b 83 05 15 eb c5 09 01 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e c3 +RSP: 0018:ffffc9000353fb00 EFLAGS: 00010082 +RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 +RDX: ffff888029196140 RSI: ffffffff815efad8 RDI: fffff520006a7f52 +RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 +R10: ffffffff815ea4ae R11: 0000000000000000 R12: ffffffff89ce23e0 +R13: ffffffff8a2614e0 R14: ffffffff816628c0 R15: dffffc0000000000 +FS: 0000000000000000(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fe1f2908924 CR3: 0000000043720000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __debug_check_no_obj_freed lib/debugobjects.c:992 [inline] + debug_check_no_obj_freed+0x301/0x420 lib/debugobjects.c:1023 + kfree+0xd6/0x310 mm/slab.c:3809 + ops_free_list.part.0+0x119/0x370 net/core/net_namespace.c:176 + ops_free_list net/core/net_namespace.c:174 [inline] + cleanup_net+0x591/0xb00 net/core/net_namespace.c:598 + process_one_work+0x996/0x1610 kernel/workqueue.c:2289 + worker_thread+0x665/0x1080 kernel/workqueue.c:2436 + kthread+0x2e9/0x3a0 kernel/kthread.c:376 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 + + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **程序漏洞分析**: + - **程序**:Linux内核 (Kernel) + - **漏洞发生原因**:在`rxrpc_exit_net()`函数中,存在一个竞争条件(race condition)。当`rxrpc_exit_net()`设置`rxnet->live = false`并尝试取消定时器和工作队列时,另一个线程可能仍然在访问或操作`peer_keepalive_timer`,导致定时器被错误地减少或触发,从而引发use-after-free问题。 + - **效果**:该漏洞可能导致系统崩溃或内存损坏,具体表现为内核警告(WARNING)或更严重的内核恐慌(kernel panic)。这会影响系统的稳定性,但不直接涉及容器或隔离机制。 + +3. **总结**:此CVE与容器、namespace、cgroup或隔离无关,仅影响Linux内核的稳定性。 + +cve: ./data/2022/49xxx/CVE-2022-49169.json +In the Linux kernel, the following vulnerability has been resolved: + +f2fs: use spin_lock to avoid hang + +[14696.634553] task:cat state:D stack: 0 pid:1613738 ppid:1613735 flags:0x00000004 +[14696.638285] Call Trace: +[14696.639038] +[14696.640032] __schedule+0x302/0x930 +[14696.640969] schedule+0x58/0xd0 +[14696.641799] schedule_preempt_disabled+0x18/0x30 +[14696.642890] __mutex_lock.constprop.0+0x2fb/0x4f0 +[14696.644035] ? mod_objcg_state+0x10c/0x310 +[14696.645040] ? obj_cgroup_charge+0xe1/0x170 +[14696.646067] __mutex_lock_slowpath+0x13/0x20 +[14696.647126] mutex_lock+0x34/0x40 +[14696.648070] stat_show+0x25/0x17c0 [f2fs] +[14696.649218] seq_read_iter+0x120/0x4b0 +[14696.650289] ? aa_file_perm+0x12a/0x500 +[14696.651357] ? lru_cache_add+0x1c/0x20 +[14696.652470] seq_read+0xfd/0x140 +[14696.653445] full_proxy_read+0x5c/0x80 +[14696.654535] vfs_read+0xa0/0x1a0 +[14696.655497] ksys_read+0x67/0xe0 +[14696.656502] __x64_sys_read+0x1a/0x20 +[14696.657580] do_syscall_64+0x3b/0xc0 +[14696.658671] entry_SYSCALL_64_after_hwframe+0x44/0xae +[14696.660068] RIP: 0033:0x7efe39df1cb2 +[14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 +[14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2 +[14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003 +[14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010 +[14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000 +[14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 +[14696.671965] +[14696.672826] task:umount state:D stack: 0 pid:1614985 ppid:1614984 flags:0x00004000 +[14696.674930] Call Trace: +[14696.675903] +[14696.676780] __schedule+0x302/0x930 +[14696.677927] schedule+0x58/0xd0 +[14696.679019] schedule_preempt_disabled+0x18/0x30 +[14696.680412] __mutex_lock.constprop.0+0x2fb/0x4f0 +[14696.681783] ? destroy_inode+0x65/0x80 +[14696.683006] __mutex_lock_slowpath+0x13/0x20 +[14696.684305] mutex_lock+0x34/0x40 +[14696.685442] f2fs_destroy_stats+0x1e/0x60 [f2fs] +[14696.686803] f2fs_put_super+0x158/0x390 [f2fs] +[14696.688238] generic_shutdown_super+0x7a/0x120 +[14696.689621] kill_block_super+0x27/0x50 +[14696.690894] kill_f2fs_super+0x7f/0x100 [f2fs] +[14696.692311] deactivate_locked_super+0x35/0xa0 +[14696.693698] deactivate_super+0x40/0x50 +[14696.694985] cleanup_mnt+0x139/0x190 +[14696.696209] __cleanup_mnt+0x12/0x20 +[14696.697390] task_work_run+0x64/0xa0 +[14696.698587] exit_to_user_mode_prepare+0x1b7/0x1c0 +[14696.700053] syscall_exit_to_user_mode+0x27/0x50 +[14696.701418] do_syscall_64+0x48/0xc0 +[14696.702630] entry_SYSCALL_64_after_hwframe+0x44/0xae + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞,如何发生,有何效果**: + - **程序**:Linux内核(Kernel),具体为F2FS文件系统模块。 + - **漏洞原因**:在F2FS文件系统的实现中,存在竞争条件问题,导致在某些情况下调用`mutex_lock`时可能发生死锁或系统挂起(hang)。具体来说,问题出现在对共享资源进行加锁操作时未正确处理并发访问。 + - **效果**:此漏洞可能导致系统在执行特定文件系统操作(如读取或卸载F2FS文件系统)时出现死锁,从而使受影响的任务处于不可中断的睡眠状态(D状态),进而影响系统的可用性。 + +cve: ./data/2022/49xxx/CVE-2022-49183.json +In the Linux kernel, the following vulnerability has been resolved: + +net/sched: act_ct: fix ref leak when switching zones + +When switching zones or network namespaces without doing a ct clear in +between, it is now leaking a reference to the old ct entry. That's +because tcf_ct_skb_nfct_cached() returns false and +tcf_ct_flow_table_lookup() may simply overwrite it. + +The fix is to, as the ct entry is not reusable, free it already at +tcf_ct_skb_nfct_cached(). + +analysis: 1. 这个CVE信息与namespace相关,因为它提到了网络命名空间(network namespaces)。 +2. 这是Linux内核的漏洞。该漏洞发生在`net/sched`子系统中的`act_ct`模块,在切换连接跟踪(connection tracking, ct)区域或网络命名空间时,如果没有在中间清除连接跟踪条目(ct clear),会导致旧的ct条目引用泄露。这是因为`tcf_ct_skb_nfct_cached()`返回false,并且`tcf_ct_flow_table_lookup()`可能会直接覆盖旧的引用。这种引用泄露可能导致资源耗尽或连接跟踪数据不一致,从而影响系统的稳定性和隔离性。 + +cve: ./data/2022/49xxx/CVE-2022-49266.json +In the Linux kernel, the following vulnerability has been resolved: + +block: fix rq-qos breakage from skipping rq_qos_done_bio() + +a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't +tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set. +While this fixed a potential oops, it also broke blk-iocost by skipping the +done_bio callback for merged bios. + +Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(), +rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED +distinguishing the former from the latter. rq_qos_done_bio() is not called +for bios which wenth through rq_qos_merge(). This royally confuses +blk-iocost as the merged bios never finish and are considered perpetually +in-flight. + +One reliably reproducible failure mode is an intermediate cgroup geting +stuck active preventing its children from being activated due to the +leaf-only rule, leading to loss of control. The following is from +resctl-bench protection scenario which emulates isolating a web server like +workload from a memory bomb run on an iocost configuration which should +yield a reasonable level of protection. + + # cat /sys/block/nvme2n1/device/model + Samsung SSD 970 PRO 512GB + # cat /sys/fs/cgroup/io.cost.model + 259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025 + # cat /sys/fs/cgroup/io.cost.qos + 259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00 + # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 + ... + Memory Hog Summary + ================== + + IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m + W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m + + Isolation and Request Latency Impact Distributions: + + min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev + isol% 15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82 + lat-imp% 0 0 0 0 0 4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6 + + Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96% + +The isolation result of 58.12% is close to what this device would show +without any IO control. + +Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and +calling rq_qos_done_bio() on them too. For consistency and clarity, rename +BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into +rq_qos_done_bio() so that it's next to the code paths that set the flags. + +With the patch applied, the above same benchmark shows: + + # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 + ... + Memory Hog Summary + ================== + + IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m + W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m + + Isolation and Request Latency Impact Distributions: + + min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev + isol% 84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42 2.81 + lat-imp% 0 0 0 0 0 2.81 5.73 11.11 13.92 17.53 22.61 4.10 4.68 + + Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0% + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与`cgroup`和隔离相关。问题涉及`blk-iocost`功能的实现,该功能是Linux内核中用于基于IO成本模型进行资源控制的一部分,而这种资源控制通常与`cgroup`(控制组)相关联。此外,问题描述中提到一个中间`cgroup`被卡住无法激活,导致其子级也无法激活,这直接影响了资源隔离的效果。 + +2. **这是什么程序的漏洞?如何发生?有何效果?**: + - 这是一个**Linux内核**中的漏洞。 + - 漏洞发生在块设备请求队列的质量服务(QoS)处理逻辑中。具体来说,`bio_endio()`函数在某些情况下跳过了对未标记为`BIO_TRACKED`的I/O请求调用`rq_qos_done_bio()`,这导致合并的I/O请求没有正确完成其生命周期,进而混淆了`blk-iocost`机制。 + - 效果是`blk-iocost`无法正确跟踪合并的I/O请求,使得某些`cgroup`的状态变得不一致,例如中间`cgroup`被卡住无法释放,从而影响其子级的激活。这种行为会导致资源隔离失效,尤其是在需要精确控制不同工作负载之间IO性能的情况下(如隔离内存炸弹和Web服务器类的工作负载)。最终结果是失去对IO资源的控制,可能导致性能下降或不公平的资源分配。 + +cve: ./data/2022/49xxx/CVE-2022-49394.json +In the Linux kernel, the following vulnerability has been resolved: + +blk-iolatency: Fix inflight count imbalances and IO hangs on offline + +iolatency needs to track the number of inflight IOs per cgroup. As this +tracking can be expensive, it is disabled when no cgroup has iolatency +configured for the device. To ensure that the inflight counters stay +balanced, iolatency_set_limit() freezes the request_queue while manipulating +the enabled counter, which ensures that no IO is in flight and thus all +counters are zero. + +Unfortunately, iolatency_set_limit() isn't the only place where the enabled +counter is manipulated. iolatency_pd_offline() can also dec the counter and +trigger disabling. As this disabling happens without freezing the q, this +can easily happen while some IOs are in flight and thus leak the counts. + +This can be easily demonstrated by turning on iolatency on an one empty +cgroup while IOs are in flight in other cgroups and then removing the +cgroup. Note that iolatency shouldn't have been enabled elsewhere in the +system to ensure that removing the cgroup disables iolatency for the whole +device. + +The following keeps flipping on and off iolatency on sda: + + echo +io > /sys/fs/cgroup/cgroup.subtree_control + while true; do + mkdir -p /sys/fs/cgroup/test + echo '8:0 target=100000' > /sys/fs/cgroup/test/io.latency + sleep 1 + rmdir /sys/fs/cgroup/test + sleep 1 + done + +and there's concurrent fio generating direct rand reads: + + fio --name test --filename=/dev/sda --direct=1 --rw=randread \ + --runtime=600 --time_based --iodepth=256 --numjobs=4 --bs=4k + +while monitoring with the following drgn script: + + while True: + for css in css_for_each_descendant_pre(prog['blkcg_root'].css.address_of_()): + for pos in hlist_for_each(container_of(css, 'struct blkcg', 'css').blkg_list): + blkg = container_of(pos, 'struct blkcg_gq', 'blkcg_node') + pd = blkg.pd[prog['blkcg_policy_iolatency'].plid] + if pd.value_() == 0: + continue + iolat = container_of(pd, 'struct iolatency_grp', 'pd') + inflight = iolat.rq_wait.inflight.counter.value_() + if inflight: + print(f'inflight={inflight} {disk_name(blkg.q.disk).decode("utf-8")} ' + f'{cgroup_path(css.cgroup).decode("utf-8")}') + time.sleep(1) + +The monitoring output looks like the following: + + inflight=1 sda /user.slice + inflight=1 sda /user.slice + ... + inflight=14 sda /user.slice + inflight=13 sda /user.slice + inflight=17 sda /user.slice + inflight=15 sda /user.slice + inflight=18 sda /user.slice + inflight=17 sda /user.slice + inflight=20 sda /user.slice + inflight=19 sda /user.slice <- fio stopped, inflight stuck at 19 + inflight=19 sda /user.slice + inflight=19 sda /user.slice + +If a cgroup with stuck inflight ends up getting throttled, the throttled IOs +will never get issued as there's no completion event to wake it up leading +to an indefinite hang. + +This patch fixes the bug by unifying enable handling into a work item which +is automatically kicked off from iolatency_set_min_lat_nsec() which is +called from both iolatency_set_limit() and iolatency_pd_offline() paths. +Punting to a work item is necessary as iolatency_pd_offline() is called +under spinlocks while freezing a request_queue requires a sleepable context. + +This also simplifies the code reducing LOC sans the comments and avoids the +unnecessary freezes which were happening whenever a cgroup's latency target +is newly set or cleared. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该CVE与`cgroup`(控制组)直接相关。问题涉及`iolatency`在`cgroup`中的实现,特别是在`cgroup`被删除或配置更改时,导致`inflight`计数不平衡和IO挂起的问题。 + +2. **这是什么程序的漏洞:** + - **程序**: Linux内核 (Kernel) + - **漏洞发生原因**: + 在Linux内核中,`iolatency`功能需要跟踪每个`cgroup`的飞行IO (`inflight IO`) 数量。为了优化性能,当没有`cgroup`为设备配置`iolatency`时,这种跟踪会被禁用。然而,在调整`enabled counter`的过程中,存在多个路径可以修改该计数器(如`iolatency_set_limit()`和`iolatency_pd_offline()`)。如果在`cgroup`被删除或重新配置时,未正确冻结`request_queue`,可能会导致`inflight`计数泄漏,进而引发IO挂起问题。 + - **效果**: + 当某个`cgroup`的`inflight`计数卡住时,如果该`cgroup`受到节流限制,则受影响的IO将永远不会被触发,导致系统出现无限期挂起的情况。这会影响依赖于IO操作的容器或应用程序,尤其是在使用`cgroup`进行资源隔离和管理的场景下。 + +3. **总结**: + 该漏洞发生在Linux内核中,与`cgroup`的`iolatency`机制相关。它可能导致IO计数不平衡和系统挂起,尤其在动态调整`cgroup`配置或删除`cgroup`时出现问题。 + +cve: ./data/2022/49xxx/CVE-2022-49411.json +In the Linux kernel, the following vulnerability has been resolved: + +bfq: Make sure bfqg for which we are queueing requests is online + +Bios queued into BFQ IO scheduler can be associated with a cgroup that +was already offlined. This may then cause insertion of this bfq_group +into a service tree. But this bfq_group will get freed as soon as last +bio associated with it is completed leading to use after free issues for +service tree users. Fix the problem by making sure we always operate on +online bfq_group. If the bfq_group associated with the bio is not +online, we pick the first online parent. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该CVE与cgroup相关。描述中明确提到问题涉及`bfq_group`和`cgroup`,并且当一个`cgroup`被下线后,仍然可能有请求与其关联,从而导致潜在的使用后释放(use-after-free)问题。 + +2. **程序漏洞分析:** + - **程序类型:** 这是Linux内核(Kernel)的漏洞。 + - **漏洞发生原因:** 在BFQ(Budget Fair Queuing)I/O调度器中,生物请求(bio requests)可能会与已经被下线的`cgroup`关联。这些请求随后会被插入到服务树(service tree)中,但对应的`bfq_group`会在最后一个关联的生物请求完成时被释放。如果其他部分代码仍然尝试访问已被释放的`bfq_group`,就会引发使用后释放(use-after-free)问题。 + - **漏洞效果:** 该漏洞可能导致内核崩溃或不稳定,攻击者可能利用此漏洞执行任意代码或导致系统拒绝服务(DoS)。 + - **修复措施:** 修复方案确保始终操作在线的`bfq_group`。如果与生物请求关联的`bfq_group`不在线,则选择第一个在线的父级`bfq_group`。 + +cve: ./data/2022/49xxx/CVE-2022-49412.json +In the Linux kernel, the following vulnerability has been resolved: + +bfq: Avoid merging queues with different parents + +It can happen that the parent of a bfqq changes between the moment we +decide two queues are worth to merge (and set bic->stable_merge_bfqq) +and the moment bfq_setup_merge() is called. This can happen e.g. because +the process submitted IO for a different cgroup and thus bfqq got +reparented. It can even happen that the bfqq we are merging with has +parent cgroup that is already offline and going to be destroyed in which +case the merge can lead to use-after-free issues such as: + +BUG: KASAN: use-after-free in __bfq_deactivate_entity+0x9cb/0xa50 +Read of size 8 at addr ffff88800693c0c0 by task runc:[2:INIT]/10544 + +CPU: 0 PID: 10544 Comm: runc:[2:INIT] Tainted: G E 5.15.2-0.g5fb85fd-default #1 openSUSE Tumbleweed (unreleased) f1f3b891c72369aebecd2e43e4641a6358867c70 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 +Call Trace: + + dump_stack_lvl+0x46/0x5a + print_address_description.constprop.0+0x1f/0x140 + ? __bfq_deactivate_entity+0x9cb/0xa50 + kasan_report.cold+0x7f/0x11b + ? __bfq_deactivate_entity+0x9cb/0xa50 + __bfq_deactivate_entity+0x9cb/0xa50 + ? update_curr+0x32f/0x5d0 + bfq_deactivate_entity+0xa0/0x1d0 + bfq_del_bfqq_busy+0x28a/0x420 + ? resched_curr+0x116/0x1d0 + ? bfq_requeue_bfqq+0x70/0x70 + ? check_preempt_wakeup+0x52b/0xbc0 + __bfq_bfqq_expire+0x1a2/0x270 + bfq_bfqq_expire+0xd16/0x2160 + ? try_to_wake_up+0x4ee/0x1260 + ? bfq_end_wr_async_queues+0xe0/0xe0 + ? _raw_write_unlock_bh+0x60/0x60 + ? _raw_spin_lock_irq+0x81/0xe0 + bfq_idle_slice_timer+0x109/0x280 + ? bfq_dispatch_request+0x4870/0x4870 + __hrtimer_run_queues+0x37d/0x700 + ? enqueue_hrtimer+0x1b0/0x1b0 + ? kvm_clock_get_cycles+0xd/0x10 + ? ktime_get_update_offsets_now+0x6f/0x280 + hrtimer_interrupt+0x2c8/0x740 + +Fix the problem by checking that the parent of the two bfqqs we are +merging in bfq_setup_merge() is the same. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个CVE与cgroup相关。漏洞描述中提到,由于进程为不同的cgroup提交了I/O请求,导致bfqq(block fair queuing queue)被重新分配父级(reparented),从而引发潜在的use-after-free问题。 + +2. **这是什么程序的漏洞:** + - 这是Linux内核(Kernel)中的一个漏洞。 + - 漏洞发生在块I/O调度器BFQ(Block Fair Queueing)模块中,具体是在队列合并逻辑部分。 + - **漏洞发生原因:** 在决定两个队列值得合并后,但在实际调用`bfq_setup_merge()`进行合并之前,如果其中一个队列的父级cgroup发生了变化(例如,进程为另一个cgroup提交了I/O请求),可能会导致合并操作涉及一个已经离线并即将被销毁的cgroup。这种情况下,可能会触发use-after-free问题。 + - **效果:** 该漏洞可能导致内核崩溃(如描述中的KASAN检测到的use-after-free错误),进而影响系统的稳定性和安全性。在容器环境中,这可能被利用来破坏隔离性或导致主机系统不稳定。 + +cve: ./data/2022/49xxx/CVE-2022-49413.json +In the Linux kernel, the following vulnerability has been resolved: + +bfq: Update cgroup information before merging bio + +When the process is migrated to a different cgroup (or in case of +writeback just starts submitting bios associated with a different +cgroup) bfq_merge_bio() can operate with stale cgroup information in +bic. Thus the bio can be merged to a request from a different cgroup or +it can result in merging of bfqqs for different cgroups or bfqqs of +already dead cgroups and causing possible use-after-free issues. Fix the +problem by updating cgroup information in bfq_merge_bio(). + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 cgroup 直接相关。描述中明确提到在进程迁移到不同 cgroup 或写回时提交与不同 cgroup 关联的 bio,可能导致合并 bio 时使用陈旧的 cgroup 信息。 + +2. **这是什么程序的漏洞**: + 这是 **Linux 内核 (Kernel)** 的漏洞。具体来说,问题发生在块 I/O 调度器 BFQ (Budget Fair Queuing) 中。漏洞的原因是当进程迁移到不同的 cgroup 或提交与不同 cgroup 关联的 bio 时,`bfq_merge_bio()` 函数可能仍然使用陈旧的 cgroup 信息,从而导致以下问题: + - bio 被错误地合并到属于不同 cgroup 的请求中。 + - 不同 cgroup 的队列 (bfqq) 被错误地合并。 + - 已经被销毁的 cgroup 的队列被使用,可能导致 use-after-free 问题。 + +3. **漏洞效果**: + 该漏洞可能导致以下后果: + - 数据完整性受损:bio 被错误地合并到错误的 cgroup 请求中,可能影响块设备上的数据写入顺序或优先级。 + - 系统崩溃或不稳定:由于 use-after-free 问题,可能会导致内核崩溃或系统不稳定。 + - 隔离性破坏:不同 cgroup 的资源使用情况可能被混淆,从而削弱 cgroup 提供的资源隔离能力。这可能间接影响容器环境中的隔离性,尤其是在使用 cgroup 来管理容器资源的情况下。 + +cve: ./data/2022/49xxx/CVE-2022-49567.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/mempolicy: fix uninit-value in mpol_rebind_policy() + +mpol_set_nodemask()(mm/mempolicy.c) does not set up nodemask when +pol->mode is MPOL_LOCAL. Check pol->mode before access +pol->w.cpuset_mems_allowed in mpol_rebind_policy()(mm/mempolicy.c). + +BUG: KMSAN: uninit-value in mpol_rebind_policy mm/mempolicy.c:352 [inline] +BUG: KMSAN: uninit-value in mpol_rebind_task+0x2ac/0x2c0 mm/mempolicy.c:368 + mpol_rebind_policy mm/mempolicy.c:352 [inline] + mpol_rebind_task+0x2ac/0x2c0 mm/mempolicy.c:368 + cpuset_change_task_nodemask kernel/cgroup/cpuset.c:1711 [inline] + cpuset_attach+0x787/0x15e0 kernel/cgroup/cpuset.c:2278 + cgroup_migrate_execute+0x1023/0x1d20 kernel/cgroup/cgroup.c:2515 + cgroup_migrate kernel/cgroup/cgroup.c:2771 [inline] + cgroup_attach_task+0x540/0x8b0 kernel/cgroup/cgroup.c:2804 + __cgroup1_procs_write+0x5cc/0x7a0 kernel/cgroup/cgroup-v1.c:520 + cgroup1_tasks_write+0x94/0xb0 kernel/cgroup/cgroup-v1.c:539 + cgroup_file_write+0x4c2/0x9e0 kernel/cgroup/cgroup.c:3852 + kernfs_fop_write_iter+0x66a/0x9f0 fs/kernfs/file.c:296 + call_write_iter include/linux/fs.h:2162 [inline] + new_sync_write fs/read_write.c:503 [inline] + vfs_write+0x1318/0x2030 fs/read_write.c:590 + ksys_write+0x28b/0x510 fs/read_write.c:643 + __do_sys_write fs/read_write.c:655 [inline] + __se_sys_write fs/read_write.c:652 [inline] + __x64_sys_write+0xdb/0x120 fs/read_write.c:652 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +Uninit was created at: + slab_post_alloc_hook mm/slab.h:524 [inline] + slab_alloc_node mm/slub.c:3251 [inline] + slab_alloc mm/slub.c:3259 [inline] + kmem_cache_alloc+0x902/0x11c0 mm/slub.c:3264 + mpol_new mm/mempolicy.c:293 [inline] + do_set_mempolicy+0x421/0xb70 mm/mempolicy.c:853 + kernel_set_mempolicy mm/mempolicy.c:1504 [inline] + __do_sys_set_mempolicy mm/mempolicy.c:1510 [inline] + __se_sys_set_mempolicy+0x44c/0xb60 mm/mempolicy.c:1507 + __x64_sys_set_mempolicy+0xd8/0x110 mm/mempolicy.c:1507 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +KMSAN: uninit-value in mpol_rebind_task (2) +https://syzkaller.appspot.com/bug?id=d6eb90f952c2a5de9ea718a1b873c55cb13b59dc + +This patch seems to fix below bug too. +KMSAN: uninit-value in mpol_rebind_mm (2) +https://syzkaller.appspot.com/bug?id=f2fecd0d7013f54ec4162f60743a2b28df40926b + +The uninit-value is pol->w.cpuset_mems_allowed in mpol_rebind_policy(). +When syzkaller reproducer runs to the beginning of mpol_new(), + + mpol_new() mm/mempolicy.c + do_mbind() mm/mempolicy.c + kernel_mbind() mm/mempolicy.c + +`mode` is 1(MPOL_PREFERRED), nodes_empty(*nodes) is `true` and `flags` +is 0. Then + + mode = MPOL_LOCAL; + ... + policy->mode = mode; + policy->flags = flags; + +will be executed. So in mpol_set_nodemask(), + + mpol_set_nodemask() mm/mempolicy.c + do_mbind() + kernel_mbind() + +pol->mode is 4 (MPOL_LOCAL), that `nodemask` in `pol` is not initialized, +which will be accessed in mpol_rebind_policy(). + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与 cgroup 和隔离机制相关。从描述中可以看到,问题涉及 `cpuset` 子系统(属于 cgroup 的一部分),并且在 `cpuset_attach` 和 `cgroup_migrate_execute` 函数中有调用栈。这表明该漏洞可能会影响基于 cgroup 实现的资源隔离和管理功能。 + +2. **这是什么程序的漏洞**: + 这是 Linux 内核的漏洞,具体发生在内存策略(memory policy)相关的代码路径中 (`mm/mempolicy.c`)。 + - **漏洞发生原因**:当内存策略模式为 `MPOL_LOCAL` 时,`mpol_set_nodemask()` 函数未正确初始化 `nodemask`,导致在 `mpol_rebind_policy()` 中访问未初始化的值 (`pol->w.cpuset_mems_allowed`)。 + - **效果**:此漏洞可能导致内核崩溃或数据损坏,因为未初始化的值被使用。此外,由于调用栈涉及 cgroup 的迁移逻辑,这可能影响基于 cgroup 的容器隔离行为,例如 CPU 或内存分配策略的设置。 + +总结:这是一个 Linux 内核中的漏洞,与 cgroup 和隔离机制相关,可能影响容器的资源分配和隔离功能。 + +cve: ./data/2022/49xxx/CVE-2022-49647.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup: Use separate src/dst nodes when preloading css_sets for migration + +Each cset (css_set) is pinned by its tasks. When we're moving tasks around +across csets for a migration, we need to hold the source and destination +csets to ensure that they don't go away while we're moving tasks about. This +is done by linking cset->mg_preload_node on either the +mgctx->preloaded_src_csets or mgctx->preloaded_dst_csets list. Using the +same cset->mg_preload_node for both the src and dst lists was deemed okay as +a cset can't be both the source and destination at the same time. + +Unfortunately, this overloading becomes problematic when multiple tasks are +involved in a migration and some of them are identity noop migrations while +others are actually moving across cgroups. For example, this can happen with +the following sequence on cgroup1: + + #1> mkdir -p /sys/fs/cgroup/misc/a/b + #2> echo $$ > /sys/fs/cgroup/misc/a/cgroup.procs + #3> RUN_A_COMMAND_WHICH_CREATES_MULTIPLE_THREADS & + #4> PID=$! + #5> echo $PID > /sys/fs/cgroup/misc/a/b/tasks + #6> echo $PID > /sys/fs/cgroup/misc/a/cgroup.procs + +the process including the group leader back into a. In this final migration, +non-leader threads would be doing identity migration while the group leader +is doing an actual one. + +After #3, let's say the whole process was in cset A, and that after #4, the +leader moves to cset B. Then, during #6, the following happens: + + 1. cgroup_migrate_add_src() is called on B for the leader. + + 2. cgroup_migrate_add_src() is called on A for the other threads. + + 3. cgroup_migrate_prepare_dst() is called. It scans the src list. + + 4. It notices that B wants to migrate to A, so it tries to A to the dst + list but realizes that its ->mg_preload_node is already busy. + + 5. and then it notices A wants to migrate to A as it's an identity + migration, it culls it by list_del_init()'ing its ->mg_preload_node and + putting references accordingly. + + 6. The rest of migration takes place with B on the src list but nothing on + the dst list. + +This means that A isn't held while migration is in progress. If all tasks +leave A before the migration finishes and the incoming task pins it, the +cset will be destroyed leading to use-after-free. + +This is caused by overloading cset->mg_preload_node for both src and dst +preload lists. We wanted to exclude the cset from the src list but ended up +inadvertently excluding it from the dst list too. + +This patch fixes the issue by separating out cset->mg_preload_node into +->mg_src_preload_node and ->mg_dst_preload_node, so that the src and dst +preloadings don't interfere with each other. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与cgroup(控制组)直接相关。cgroup是Linux内核中的一个功能模块,用于限制、记录和隔离进程组的资源(如CPU、内存、磁盘I/O等)。此漏洞涉及cgroup迁移过程中css_set的处理问题,因此与隔离机制密切相关。 + +2. **程序漏洞分析**: + - **程序类型**:这是Linux内核(Kernel)中的漏洞。 + - **漏洞发生原因**:在cgroup任务迁移的过程中,内核使用了同一个`cset->mg_preload_node`来表示源(src)和目标(dst)节点。这种设计在某些复杂场景下会导致冲突,例如当多个线程同时进行迁移时,部分线程执行的是“身份迁移”(即不改变cgroup),而另一部分线程实际改变了cgroup。这种情况下,源和目标列表的操作可能会相互干扰,导致目标列表中的节点被意外删除。 + - **漏洞效果**:由于目标列表中的节点被错误地移除,可能导致在迁移过程中无法正确持有相关的css_set对象。如果所有任务在迁移完成前离开某个css_set,而此时有新任务试图引用该css_set,就会触发use-after-free漏洞。这可能引发内核崩溃或被攻击者利用以提升权限或执行恶意代码。 + +总结:这是一个与cgroup任务迁移相关的Linux内核漏洞,可能导致use-after-free问题,影响系统的稳定性和安全性。 + +cve: ./data/2022/49xxx/CVE-2022-49696.json +In the Linux kernel, the following vulnerability has been resolved: + +tipc: fix use-after-free Read in tipc_named_reinit + +syzbot found the following issue on: +================================================================== +BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0 +net/tipc/name_distr.c:413 +Read of size 8 at addr ffff88805299a000 by task kworker/1:9/23764 + +CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted +5.18.0-rc4-syzkaller-00878-g17d49e6e8012 #0 +Hardware name: Google Compute Engine/Google Compute Engine, +BIOS Google 01/01/2011 +Workqueue: events tipc_net_finalize_work +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + print_address_description.constprop.0.cold+0xeb/0x495 +mm/kasan/report.c:313 + print_report mm/kasan/report.c:429 [inline] + kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 + tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 + tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138 + process_one_work+0x996/0x1610 kernel/workqueue.c:2289 + worker_thread+0x665/0x1080 kernel/workqueue.c:2436 + kthread+0x2e9/0x3a0 kernel/kthread.c:376 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 + +[...] +================================================================== + +In the commit +d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work"), +the cancel_work_sync() function just to make sure ONLY the work +tipc_net_finalize_work() is executing/pending on any CPU completed before +tipc namespace is destroyed through tipc_exit_net(). But this function +is not guaranteed the work is the last queued. So, the destroyed instance +may be accessed in the work which will try to enqueue later. + +In order to completely fix, we re-order the calling of cancel_work_sync() +to make sure the work tipc_net_finalize_work() was last queued and it +must be completed by calling cancel_work_sync(). + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + N/A + +2. **程序漏洞分析**: + - **程序类型**:Linux 内核 (Kernel) + - **漏洞发生位置**:`tipc_named_reinit` 函数中存在 use-after-free 问题。 + - **漏洞原因**:在 `tipc_exit_net()` 销毁 TIPC(Transparent Inter-Process Communication)命名空间时,`cancel_work_sync()` 函数未能确保 `tipc_net_finalize_work()` 是最后一个被排队的工作项。因此,在销毁 TIPC 命名空间后,可能仍然存在对已释放实例的访问,从而导致 use-after-free 漏洞。 + - **漏洞效果**:攻击者可能利用此漏洞导致系统崩溃(内核崩溃)或潜在的信息泄露,具体取决于 use-after-free 的上下文和触发方式。 + +3. **结论**:N/A + +cve: ./data/2023/0xxx/CVE-2023-0626.json +Docker Desktop before 4.12.0 is vulnerable to RCE via query parameters in message-box route. + +This issue affects Docker Desktop: before 4.12.0. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop程序的漏洞。漏洞发生在Docker Desktop的message-box路由中,攻击者可以通过构造恶意查询参数触发远程代码执行(RCE)。此漏洞的效果是允许攻击者在受影响的Docker Desktop实例上执行任意代码,可能导致系统完全 compromised。 + +cve: ./data/2023/0xxx/CVE-2023-0627.json +Docker Desktop 4.11.x allows --no-windows-containers flag bypass via IPC response spoofing which may lead to Local Privilege Escalation (LPE).This issue affects Docker Desktop: 4.11.X. + +analysis: 1. 该 CVE 信息与 namespace、cgroup、container 或者容器、隔离相关。 + +2. 这是 Docker Desktop 的漏洞。 + - 漏洞发生的原因:Docker Desktop 4.11.x 版本中存在一个通过 IPC(Inter-Process Communication)响应伪造绕过 `--no-windows-containers` 标志的问题。 + - 效果:攻击者可以通过伪造 IPC 响应,绕过容器运行模式的限制,从而可能导致本地权限提升(LPE)。这表明在 Windows 系统上,攻击者可能利用此漏洞突破容器的隔离机制,获取更高的系统权限。 + +cve: ./data/2023/0xxx/CVE-2023-0628.json +Docker Desktop before 4.17.0 allows an attacker to execute an arbitrary command inside a Dev Environments container during initialization by tricking a user to open a crafted malicious docker-desktop:// URL. + +analysis: 1. 该 CVE 信息与 container 相关。 + +2. 这是 Docker Desktop 的漏洞。 + - 漏洞发生的原因:Docker Desktop 在处理 `docker-desktop://` URL 时,未对输入进行充分验证,导致攻击者可以通过诱导用户打开恶意构造的 URL,在 Dev Environments 容器初始化期间执行任意命令。 + - 效果:攻击者可以利用此漏洞在容器中执行任意代码,可能进一步影响主机或其他容器,破坏隔离性并危及系统安全。 + +程序类型:容器实现(Docker Desktop)。 + +cve: ./data/2023/0xxx/CVE-2023-0629.json +Docker Desktop before 4.17.0 allows an unprivileged user to bypass Enhanced Container Isolation (ECI) restrictions by setting the Docker host to docker.raw.sock, or npipe:////.pipe/docker_engine_linux on Windows, via the -H (--host) CLI flag or the DOCKER_HOST environment variable and launch containers without the additional hardening features provided by ECI. This would not affect already running containers, nor containers launched through the usual approach (without Docker's raw socket). + +The affected functionality is available for Docker Business customers only and assumes an environment where users are not granted local root or Administrator privileges. +This issue has been fixed in Docker Desktop 4.17.0. + +Affected Docker Desktop versions: from 4.13.0 before 4.17.0. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器隔离相关。它涉及 Docker Desktop 的 Enhanced Container Isolation (ECI) 功能,该功能旨在增强容器的隔离性。通过绕过 ECI 限制,攻击者可以启动缺乏额外加固措施的容器。 + +2. **程序漏洞信息**: + - **程序**:Docker Desktop + - **漏洞原因**:Docker Desktop 允许未特权用户通过设置 `docker.raw.sock` 或 `npipe:////.pipe/docker_engine_linux`(在 Windows 上)作为 Docker 主机地址(通过 `-H` CLI 标志或 `DOCKER_HOST` 环境变量),从而绕过 Enhanced Container Isolation (ECI) 的限制。 + - **效果**:攻击者可以启动没有 ECI 提供的额外加固功能的容器。这降低了容器的安全性和隔离性,可能导致容器逃逸或其他安全风险。已经运行的容器或通过常规方式启动的容器不受影响。 + +3. **总结**: + 该漏洞与容器隔离密切相关,影响 Docker Desktop 的 Enhanced Container Isolation 功能,允许未特权用户绕过隔离限制以启动安全性较低的容器。 + +cve: ./data/2023/0xxx/CVE-2023-0923.json +A flaw was found in the Kubernetes service for notebooks in RHODS, where it does not prevent pods from other namespaces and applications from making requests to the Jupyter API. This flaw can lead to file content exposure and other issues. + +analysis: 1. 该CVE信息与namespace相关。 +2. 这是Kubernetes服务的漏洞。漏洞发生在Kubernetes服务未能正确限制其他命名空间中的Pod对Jupyter API的访问,导致跨命名空间的未经授权访问。效果是攻击者可能利用此漏洞访问敏感文件内容,造成数据泄露和其他潜在问题。 + +cve: ./data/2023/1xxx/CVE-2023-1174.json +This vulnerability exposes a network port in minikube running on macOS with Docker driver that could enable unexpected remote access to the minikube container. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**:是的,此 CVE 与容器和隔离相关。 + +2. **程序漏洞分析**: + - 这是 **minikube** 的漏洞,而 minikube 使用了 Docker 作为其容器运行时。 + - 漏洞发生的原因是在 macOS 上使用 Docker 驱动运行 minikube 时,某个网络端口被意外暴露。 + - 效果:该漏洞可能导致未经授权的远程访问到 minikube 容器,从而破坏容器的隔离性,攻击者可能利用此访问权限进一步危害系统或获取敏感信息。 + +cve: ./data/2023/1xxx/CVE-2023-1555.json +An issue has been discovered in GitLab affecting all versions starting from 15.2 before 16.1.5, all versions starting from 16.2 before 16.2.5, all versions starting from 16.3 before 16.3.1. A namespace-level banned user can access the API. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是GitLab程序的漏洞。漏洞发生的原因是GitLab在处理namespace级别的被禁用户时,没有正确地限制其对API的访问。效果是被禁用的用户仍然能够通过API访问和操作资源,从而绕过安全限制。 + +cve: ./data/2023/1xxx/CVE-2023-1636.json +A vulnerability was found in OpenStack Barbican containers. This vulnerability is only applicable to deployments that utilize an all-in-one configuration. Barbican containers share the same CGROUP, USER, and NET namespace with the host system and other OpenStack services. If any service is compromised, it could gain access to the data transmitted to and from Barbican. + +analysis: 1. 这个CVE信息与namespace、cgroup、container以及隔离相关。 + +2. 这是OpenStack Barbican容器的漏洞。该漏洞发生的原因是Barbican容器在all-in-one配置下,与主机系统和其他OpenStack服务共享了同一个CGROUP、USER和NET namespace。这种共享破坏了容器之间的隔离性。如果任何服务被攻破,攻击者可能获取到传输到和来自Barbican的数据,从而导致数据泄露或进一步的攻击。此漏洞并非内核Kernel或Docker容器实现本身的漏洞,而是由于容器配置不当引起的隔离性问题。 + +cve: ./data/2023/20xxx/CVE-2023-20235.json +A vulnerability in the on-device application development workflow feature for the Cisco IOx application hosting infrastructure in Cisco IOS XE Software could allow an authenticated, remote attacker to access the underlying operating system as the root user. + + This vulnerability exists because Docker containers with the privileged runtime option are not blocked when they are in application development mode. An attacker could exploit this vulnerability by using the Docker CLI to access an affected device. The application development workflow is meant to be used only on development systems and not in production systems. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,此 CVE 与容器(Docker)以及隔离机制相关。 + +2. **程序漏洞分析**: + - 这是 **Cisco IOS XE Software** 中 **Cisco IOx 应用托管基础设施** 的漏洞。 + - 漏洞发生的原因是:当 Docker 容器以 privileged 模式运行且处于应用开发模式时,系统未能正确阻止这些容器。这破坏了容器的隔离性,使得攻击者可以利用 Docker CLI 访问底层操作系统,并以 root 用户身份获得访问权限。 + - 效果:攻击者可以通过此漏洞绕过容器的隔离机制,直接访问并控制底层操作系统,导致完全的系统妥协。 + +cve: ./data/2023/23xxx/CVE-2023-23468.json +IBM Robotic Process Automation for Cloud Pak 21.0.1 through 21.0.7.3 and 23.0.0 through 23.0.3 is vulnerable to insufficient security configuration which may allow creation of namespaces within a cluster. IBM X-Force ID: 244500. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,此 CVE 信息与 namespace 相关。 + +2. **程序漏洞分析**: + - 这是 **IBM Robotic Process Automation for Cloud Pak** 的漏洞。 + - 漏洞发生的原因是由于安全配置不足,导致可能在集群中创建 namespaces。 + - 效果:攻击者可能利用此漏洞在集群中创建额外的 namespaces,从而破坏隔离性,可能进一步影响集群的安全性和稳定性。 + +cve: ./data/2023/23xxx/CVE-2023-23586.json +Due to a vulnerability in the io_uring subsystem, it is possible to leak kernel memory information to the user process. timens_install calls current_is_single_threaded to determine if the current process is single-threaded, but this call does not consider io_uring's io_worker threads, thus it is possible to insert a time namespace's vvar page to process's memory space via a page fault. When this time namespace is destroyed, the vvar page is also freed, but not removed from the process' memory, and a next page allocated by the kernel will be still available from the user-space process and can leak memory contents via this (read-only) use-after-free vulnerability. We recommend upgrading past version 5.10.161 or commit  788d0824269bef539fe31a785b1517882eafed93 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/io_uring + +analysis: 1. **分析是否与 namespace、cgroup、container 或容器、隔离相关** + 是的,这个 CVE 与 namespace 相关。具体来说,它涉及 time namespace(时间命名空间),这是 Linux 内核中的一种 namespace 类型,用于提供时间相关的隔离。 + +2. **漏洞所属程序及影响分析** + - **程序**:这是一个 Linux 内核(Kernel)的漏洞,具体发生在 io_uring 子系统中。 + - **漏洞发生原因**: + 在处理 time namespace 的过程中,`timens_install` 函数调用了 `current_is_single_threaded` 来判断当前进程是否为单线程。然而,该函数没有考虑到 io_uring 的 `io_worker` 线程,这导致在某些情况下,time namespace 的 vvar 页面可能被错误地插入到进程的内存空间中。当该 time namespace 被销毁时,vvar 页面会被释放,但不会从进程的内存中移除,从而造成一个只读的 use-after-free 漏洞。 + - **漏洞效果**:攻击者可以通过此漏洞访问内核内存中的敏感信息,可能导致信息泄露。由于 time namespace 常用于容器环境中,这一漏洞可能会破坏容器的隔离性,使攻击者能够从容器内部获取宿主机或其他容器的内核内存内容。 + +总结:该 CVE 与 namespace 和隔离机制相关,是一个 Linux 内核 io_uring 子系统的漏洞,可能导致容器环境下的隔离失效和信息泄露。 + +cve: ./data/2023/25xxx/CVE-2023-25173.json +containerd is an open source container runtime. A bug was found in containerd prior to versions 1.6.18 and 1.5.18 where supplementary groups are not set up properly inside a container. If an attacker has direct access to a container and manipulates their supplementary group access, they may be able to use supplementary group access to bypass primary group restrictions in some cases, potentially gaining access to sensitive information or gaining the ability to execute code in that container. Downstream applications that use the containerd client library may be affected as well. + +This bug has been fixed in containerd v1.6.18 and v.1.5.18. Users should update to these versions and recreate containers to resolve this issue. Users who rely on a downstream application that uses containerd's client library should check that application for a separate advisory and instructions. As a workaround, ensure that the `"USER $USERNAME"` Dockerfile instruction is not used. Instead, set the container entrypoint to a value similar to `ENTRYPOINT ["su", "-", "user"]` to allow `su` to properly set up supplementary groups. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与容器(container)相关,具体涉及容器内的用户和组权限配置问题。 + +2. **程序漏洞分析**: + - **程序**:这是 containerd 的漏洞。containerd 是一个开源的容器运行时。 + - **漏洞发生原因**:在 containerd 版本 1.6.18 和 1.5.18 之前,容器内的补充组(supplementary groups)没有被正确设置。 + - **效果**:如果攻击者能够直接访问容器并操纵其补充组权限,他们可能利用这些权限绕过主组(primary group)的限制。这可能导致以下后果: + - 访问敏感信息。 + - 在某些情况下,获得执行代码的能力。 + - **影响范围**:不仅直接影响 containerd,还可能影响使用 containerd 客户端库的下游应用程序。 + +总结:这是一个与容器相关的漏洞,涉及 containerd 容器运行时中补充组配置不当的问题,可能导致权限提升或敏感信息泄露。 + +cve: ./data/2023/25xxx/CVE-2023-25809.json +runc is a CLI tool for spawning and running containers according to the OCI specification. In affected versions it was found that rootless runc makes `/sys/fs/cgroup` writable in following conditons: 1. when runc is executed inside the user namespace, and the `config.json` does not specify the cgroup namespace to be unshared (e.g.., `(docker|podman|nerdctl) run --cgroupns=host`, with Rootless Docker/Podman/nerdctl) or 2. when runc is executed outside the user namespace, and `/sys` is mounted with `rbind, ro` (e.g., `runc spec --rootless`; this condition is very rare). A container may gain the write access to user-owned cgroup hierarchy `/sys/fs/cgroup/user.slice/...` on the host . Other users's cgroup hierarchies are not affected. Users are advised to upgrade to version 1.1.5. Users unable to upgrade may unshare the cgroup namespace (`(docker|podman|nerdctl) run --cgroupns=private)`. This is the default behavior of Docker/Podman/nerdctl on cgroup v2 hosts. or add `/sys/fs/cgroup` to `maskedPaths`. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 信息与 namespace、cgroup、container 和隔离密切相关。 + +2. **漏洞所属程序及影响分析:** + - **程序名称:** runc(一个根据 OCI 规范启动和运行容器的 CLI 工具)。 + - **漏洞发生原因:** + 在受影响版本中,当 runc 以 rootless 模式运行时,在以下两种情况下会使 `/sys/fs/cgroup` 可写: + 1. 当 runc 在用户命名空间内执行,并且 `config.json` 文件未指定不共享 cgroup 命名空间(例如使用 `docker run --cgroupns=host` 或类似命令时)。 + 2. 当 runc 在用户命名空间外执行,并且 `/sys` 被挂载为只读绑定(例如通过 `runc spec --rootless`)。 + - **漏洞效果:** + 容器可能会获得对主机上用户拥有的 cgroup 层级(如 `/sys/fs/cgroup/user.slice/...`)的写访问权限。这可能导致容器突破隔离,修改宿主机上的 cgroup 配置,从而可能影响宿主机资源分配或引发其他安全问题。不过,其他用户的 cgroup 层级不受影响。 + - **缓解措施:** + - 升级到 runc 1.1.5 版本。 + - 使用 `--cgroupns=private` 参数以确保 cgroup 命名空间不被共享。 + - 将 `/sys/fs/cgroup` 添加到 `maskedPaths` 中。 + +总结:这是一个与容器隔离相关的漏洞,涉及 runc 的 cgroup 和命名空间处理逻辑,可能导致容器突破部分隔离机制。 + +cve: ./data/2023/26xxx/CVE-2023-26490.json +mailcow is a dockerized email package, with multiple containers linked in one bridged network. The Sync Job feature - which can be made available to standard users by assigning them the necessary permission - suffers from a shell command injection. A malicious user can abuse this vulnerability to obtain shell access to the Docker container running dovecot. The imapsync Perl script implements all the necessary functionality for this feature, including the XOAUTH2 authentication mechanism. This code path creates a shell command to call openssl. However, since different parts of the specified user password are included without any validation, one can simply execute additional shell commands. Notably, the default ACL for a newly-created mailcow account does not include the necessary permission. The Issue has been fixed within the 2023-03 Update (March 3rd 2023). As a temporary workaround the Syncjob ACL can be removed from all mailbox users, preventing from creating or changing existing Syncjobs. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器相关。漏洞发生在 mailcow 的 Docker 容器环境中,具体是与 Dovecot 容器相关的 shell 命令注入问题。 + +2. **程序漏洞分析:** + - **程序类型:** 容器内部运行的应用(Dovecot 容器中的 imapsync Perl 脚本)。 + - **漏洞发生原因:** 在实现 Sync Job 功能时,imapsync Perl 脚本中存在命令注入漏洞。当用户密码的不同部分被直接嵌入到调用 openssl 的 shell 命令中时,未进行任何验证或转义,导致攻击者可以注入额外的 shell 命令。 + - **漏洞效果:** 攻击者可以通过此漏洞获得对运行 Dovecot 的 Docker 容器的 shell 访问权限。虽然漏洞本身限于容器内,但由于容器共享同一个网络(bridged network),可能进一步威胁其他关联容器的安全性。 + +总结:这是一个容器内部应用(Dovecot 容器中的 imapsync 脚本)的命令注入漏洞,可能导致容器级别的权限提升和横向移动风险。 + +cve: ./data/2023/27xxx/CVE-2023-27290.json +Docker based datastores for IBM Instana (IBM Observability with Instana 239-0 through 239-2, 241-0 through 241-2, and 243-0) do not currently require authentication. Due to this, an attacker within the network could access the datastores with read/write access. IBM X-Force ID: 248737. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker数据存储的漏洞,涉及IBM Instana(现为IBM Observability with Instana)的特定版本(239-0至239-2、241-0至241-2以及243-0)。该漏洞的发生是因为Docker数据存储未启用身份验证机制,导致网络中的攻击者可以未经授权访问这些数据存储,并获得读/写权限。此漏洞的效果是允许攻击者在无需身份验证的情况下访问和修改敏感数据,从而可能泄露或破坏数据完整性。 + +cve: ./data/2023/27xxx/CVE-2023-27595.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. In version 1.13.0, when Cilium is started, there is a short period when Cilium eBPF programs are not attached to the host. During this period, the host does not implement any of Cilium's featureset. This can cause disruption to newly established connections during this period due to the lack of Load Balancing, or can cause Network Policy bypass due to the lack of Network Policy enforcement during the window. This vulnerability impacts any Cilium-managed endpoints on the node (such as Kubernetes Pods), as well as the host network namespace (including Host Firewall). This vulnerability is fixed in Cilium 1.13.1 or later. Cilium releases 1.12.x, 1.11.x, and earlier are not affected. There are no known workarounds. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace 和容器相关。Cilium 是一个用于容器网络和安全的解决方案,它管理 Kubernetes Pods 等容器化工作负载,并且涉及主机网络命名空间(host network namespace)。 + +2. **这是什么程序的漏洞,如何发生,有何效果**: + - **程序**:这是一个 Cilium 的漏洞。Cilium 是一个基于 eBPF 的容器网络和安全解决方案。 + - **漏洞发生原因**:在 Cilium 启动时,存在一个短暂的时间窗口,在此期间 Cilium 的 eBPF 程序尚未附加到主机上。因此,主机在此期间无法实施 Cilium 提供的功能集。 + - **漏洞效果**: + - 新建立的连接可能会因缺少负载均衡功能而中断。 + - 由于缺乏网络策略(Network Policy)的强制执行,可能导致网络策略被绕过。 + - 该问题影响节点上的所有 Cilium 管理的端点(例如 Kubernetes Pods)以及主机网络命名空间(包括主机防火墙)。 + +cve: ./data/2023/28xxx/CVE-2023-28840.json +Moby is an open source container framework developed by Docker Inc. that is distributed as Docker, Mirantis Container Runtime, and various other downstream projects/products. The Moby daemon component (`dockerd`), which is developed as moby/moby, is commonly referred to as *Docker*. + +Swarm Mode, which is compiled in and delivered by default in dockerd and is thus present in most major Moby downstreams, is a simple, built-in container orchestrator that is implemented through a combination of SwarmKit and supporting network code. + +The overlay network driver is a core feature of Swarm Mode, providing isolated virtual LANs that allow communication between containers and services across the cluster. This driver is an implementation/user of VXLAN, which encapsulates link-layer (Ethernet) frames in UDP datagrams that tag the frame with a VXLAN Network ID (VNI) that identifies the originating overlay network. In addition, the overlay network driver supports an optional, off-by-default encrypted mode, which is especially useful when VXLAN packets traverses an untrusted network between nodes. + +Encrypted overlay networks function by encapsulating the VXLAN datagrams through the use of the IPsec Encapsulating Security Payload protocol in Transport mode. By deploying IPSec encapsulation, encrypted overlay networks gain the additional properties of source authentication through cryptographic proof, data integrity through check-summing, and confidentiality through encryption. + +When setting an endpoint up on an encrypted overlay network, Moby installs three iptables (Linux kernel firewall) rules that enforce both incoming and outgoing IPSec. These rules rely on the u32 iptables extension provided by the xt_u32 kernel module to directly filter on a VXLAN packet's VNI field, so that IPSec guarantees can be enforced on encrypted overlay networks without interfering with other overlay networks or other users of VXLAN. + +Two iptables rules serve to filter incoming VXLAN datagrams with a VNI that corresponds to an encrypted network and discards unencrypted datagrams. The rules are appended to the end of the INPUT filter chain, following any rules that have been previously set by the system administrator. Administrator-set rules take precedence over the rules Moby sets to discard unencrypted VXLAN datagrams, which can potentially admit unencrypted datagrams that should have been discarded. + +The injection of arbitrary Ethernet frames can enable a Denial of Service attack. A sophisticated attacker may be able to establish a UDP or TCP connection by way of the container’s outbound gateway that would otherwise be blocked by a stateful firewall, or carry out other escalations beyond simple injection by smuggling packets into the overlay network. + +Patches are available in Moby releases 23.0.3 and 20.10.24. As Mirantis Container Runtime's 20.10 releases are numbered differently, users of that platform should update to 20.10.16. + +Some workarounds are available. Close the VXLAN port (by default, UDP port 4789) to incoming traffic at the Internet boundary to prevent all VXLAN packet injection, and/or ensure that the `xt_u32` kernel module is available on all nodes of the Swarm cluster. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器和隔离相关。具体涉及 Docker 的 Swarm Mode 和 overlay 网络驱动,这是容器编排和网络隔离的核心功能之一。 + +2. **漏洞所属程序及影响分析:** + - **程序:** 这是 Docker(Moby 项目)中的漏洞,具体发生在 `dockerd` 的 Swarm Mode 组件中。 + - **漏洞发生原因:** 在设置加密的 overlay 网络时,Docker 使用 iptables 规则来过滤 VXLAN 数据包的 VNI 字段,以确保只有加密的数据包可以通过。然而,这些规则被追加到 INPUT 链的末尾,可能导致系统管理员预先设置的规则优先于 Docker 的规则,从而允许未加密的 VXLAN 数据包通过。 + - **漏洞效果:** + - 攻击者可能通过注入任意 Ethernet 帧发起拒绝服务攻击(DoS)。 + - 更严重的后果是,攻击者可能绕过状态防火墙,建立本应被阻止的 UDP 或 TCP 连接。 + - 可能进一步导致其他攻击升级,例如通过数据包走私进入 overlay 网络。 + +3. **总结:** + 该 CVE 直接影响 Docker 的容器网络隔离机制,尤其是 Swarm Mode 中的 overlay 网络驱动。它可能导致未加密的 VXLAN 数据包绕过安全检查,破坏容器之间的网络隔离,进而引发 DoS 或其他更严重的攻击。 + +cve: ./data/2023/28xxx/CVE-2023-28841.json +Moby is an open source container framework developed by Docker Inc. that is distributed as Docker, Mirantis Container Runtime, and various other downstream projects/products. The Moby daemon component (`dockerd`), which is developed as moby/moby is commonly referred to as *Docker*. + +Swarm Mode, which is compiled in and delivered by default in `dockerd` and is thus present in most major Moby downstreams, is a simple, built-in container orchestrator that is implemented through a combination of SwarmKit and supporting network code. + +The `overlay` network driver is a core feature of Swarm Mode, providing isolated virtual LANs that allow communication between containers and services across the cluster. This driver is an implementation/user of VXLAN, which encapsulates link-layer (Ethernet) frames in UDP datagrams that tag the frame with the VXLAN metadata, including a VXLAN Network ID (VNI) that identifies the originating overlay network. In addition, the overlay network driver supports an optional, off-by-default encrypted mode, which is especially useful when VXLAN packets traverses an untrusted network between nodes. + +Encrypted overlay networks function by encapsulating the VXLAN datagrams through the use of the IPsec Encapsulating Security Payload protocol in Transport mode. By deploying IPSec encapsulation, encrypted overlay networks gain the additional properties of source authentication through cryptographic proof, data integrity through check-summing, and confidentiality through encryption. + +When setting an endpoint up on an encrypted overlay network, Moby installs three iptables (Linux kernel firewall) rules that enforce both incoming and outgoing IPSec. These rules rely on the `u32` iptables extension provided by the `xt_u32` kernel module to directly filter on a VXLAN packet's VNI field, so that IPSec guarantees can be enforced on encrypted overlay networks without interfering with other overlay networks or other users of VXLAN. + +An iptables rule designates outgoing VXLAN datagrams with a VNI that corresponds to an encrypted overlay network for IPsec encapsulation. + +Encrypted overlay networks on affected platforms silently transmit unencrypted data. As a result, `overlay` networks may appear to be functional, passing traffic as expected, but without any of the expected confidentiality or data integrity guarantees. + +It is possible for an attacker sitting in a trusted position on the network to read all of the application traffic that is moving across the overlay network, resulting in unexpected secrets or user data disclosure. Thus, because many database protocols, internal APIs, etc. are not protected by a second layer of encryption, a user may use Swarm encrypted overlay networks to provide confidentiality, which due to this vulnerability this is no longer guaranteed. + +Patches are available in Moby releases 23.0.3, and 20.10.24. As Mirantis Container Runtime's 20.10 releases are numbered differently, users of that platform should update to 20.10.16. + +Some workarounds are available. Close the VXLAN port (by default, UDP port 4789) to outgoing traffic at the Internet boundary in order to prevent unintentionally leaking unencrypted traffic over the Internet, and/or ensure that the `xt_u32` kernel module is available on all nodes of the Swarm cluster. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器和隔离技术密切相关。它涉及 Docker/Moby 的 Swarm Mode 和 overlay 网络驱动,这些功能依赖于 VXLAN 和 IPsec 来实现跨主机容器网络的隔离和加密。 + +2. **这是什么程序的漏洞:** + - 漏洞发生在 **Docker/Moby** 的实现中,具体是 `dockerd`(Moby 的守护进程)在处理 Swarm Mode 的加密 overlay 网络时出现的问题。 + - **漏洞发生原因:** 在设置加密 overlay 网络时,Moby 使用 iptables 的 `u32` 扩展来过滤 VXLAN 数据包的 VNI 字段,以确保只有指定的 overlay 网络流量被 IPsec 加密。然而,如果系统中缺少 `xt_u32` 内核模块,iptables 规则无法正确执行,导致本应加密的流量未被加密就直接传输。 + - **漏洞效果:** 加密 overlay 网络上的数据实际上以明文形式传输,破坏了数据的机密性和完整性。攻击者可以通过网络嗅探获取敏感数据,包括数据库通信、内部 API 调用等。 + +3. **总结:** + - 该漏洞与容器隔离技术中的网络隔离部分相关。 + - 它影响 Docker/Moby 的 Swarm Mode 功能,特别是在使用加密 overlay 网络时。 + - 漏洞可能导致敏感数据泄露,破坏加密 overlay 网络的安全性保证。 + +cve: ./data/2023/28xxx/CVE-2023-28842.json +Moby) is an open source container framework developed by Docker Inc. that is distributed as Docker, Mirantis Container Runtime, and various other downstream projects/products. The Moby daemon component (`dockerd`), which is developed as moby/moby is commonly referred to as *Docker*. + +Swarm Mode, which is compiled in and delivered by default in `dockerd` and is thus present in most major Moby downstreams, is a simple, built-in container orchestrator that is implemented through a combination of SwarmKit and supporting network code. + +The `overlay` network driver is a core feature of Swarm Mode, providing isolated virtual LANs that allow communication between containers and services across the cluster. This driver is an implementation/user of VXLAN, which encapsulates link-layer (Ethernet) frames in UDP datagrams that tag the frame with the VXLAN metadata, including a VXLAN Network ID (VNI) that identifies the originating overlay network. In addition, the overlay network driver supports an optional, off-by-default encrypted mode, which is especially useful when VXLAN packets traverses an untrusted network between nodes. + +Encrypted overlay networks function by encapsulating the VXLAN datagrams through the use of the IPsec Encapsulating Security Payload protocol in Transport mode. By deploying IPSec encapsulation, encrypted overlay networks gain the additional properties of source authentication through cryptographic proof, data integrity through check-summing, and confidentiality through encryption. + +When setting an endpoint up on an encrypted overlay network, Moby installs three iptables (Linux kernel firewall) rules that enforce both incoming and outgoing IPSec. These rules rely on the `u32` iptables extension provided by the `xt_u32` kernel module to directly filter on a VXLAN packet's VNI field, so that IPSec guarantees can be enforced on encrypted overlay networks without interfering with other overlay networks or other users of VXLAN. + +The `overlay` driver dynamically and lazily defines the kernel configuration for the VXLAN network on each node as containers are attached and detached. Routes and encryption parameters are only defined for destination nodes that participate in the network. The iptables rules that prevent encrypted overlay networks from accepting unencrypted packets are not created until a peer is available with which to communicate. + +Encrypted overlay networks silently accept cleartext VXLAN datagrams that are tagged with the VNI of an encrypted overlay network. As a result, it is possible to inject arbitrary Ethernet frames into the encrypted overlay network by encapsulating them in VXLAN datagrams. The implications of this can be quite dire, and GHSA-vwm3-crmr-xfxw should be referenced for a deeper exploration. + +Patches are available in Moby releases 23.0.3, and 20.10.24. As Mirantis Container Runtime's 20.10 releases are numbered differently, users of that platform should update to 20.10.16. + +Some workarounds are available. In multi-node clusters, deploy a global ‘pause’ container for each encrypted overlay network, on every node. For a single-node cluster, do not use overlay networks of any sort. Bridge networks provide the same connectivity on a single node and have no multi-node features. The Swarm ingress feature is implemented using an overlay network, but can be disabled by publishing ports in `host` mode instead of `ingress` mode (allowing the use of an external load balancer), and removing the `ingress` network. If encrypted overlay networks are in exclusive use, block UDP port 4789 from traffic that has not been validated by IPSec. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器和隔离相关。具体来说,它涉及 Docker/Moby 的 Swarm Mode 和 overlay 网络驱动程序,这些功能用于在容器集群中提供网络隔离和通信。 + +2. **漏洞所属程序及影响分析:** + - **程序类型:** 这是一个容器实现(Docker/Moby)的漏洞。 + - **漏洞发生原因:** 在使用加密 overlay 网络时,Moby 安装了 iptables 规则以防止未加密的 VXLAN 数据包进入加密的 overlay 网络。然而,这些规则仅在有可用对等节点时才会创建。在此之前,加密 overlay 网络会静默接受未加密的 VXLAN 数据包,导致潜在的安全风险。 + - **漏洞效果:** 攻击者可以通过注入未加密的 VXLAN 数据包来绕过加密保护,从而向加密的 overlay 网络中注入任意以太网帧。这可能破坏数据完整性、机密性和源认证,进而导致敏感信息泄露或网络通信被篡改。 + +总结:该 CVE 与容器隔离机制中的网络部分相关,主要影响 Docker/Moby 的 Swarm Mode 和 overlay 网络驱动程序。 + +cve: ./data/2023/28xxx/CVE-2023-28960.json +An Incorrect Permission Assignment for Critical Resource vulnerability in Juniper Networks Junos OS Evolved allows a local, authenticated low-privileged attacker to copy potentially malicious files into an existing Docker container on the local system. A follow-on administrator could then inadvertently start the Docker container leading to the malicious files being executed as root. This issue only affects systems with Docker configured and enabled, which is not enabled by default. Systems without Docker started are not vulnerable to this issue. This issue affects Juniper Networks Junos OS Evolved: 20.4 versions prior to 20.4R3-S5-EVO; 21.2 versions prior to 21.2R3-EVO; 21.3 versions prior to 21.3R3-EVO; 21.4 versions prior to 21.4R2-EVO. This issue does not affect Juniper Networks Junos OS Evolved versions prior to 19.2R1-EVO. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**:是的,该 CVE 与容器(Docker)相关,并且涉及文件系统权限管理和隔离问题。 + +2. **程序漏洞分析**: + - **这是什么程序的漏洞**:这是一个 Juniper Networks Junos OS Evolved 的漏洞。 + - **漏洞类型及发生原因**:由于 Junos OS Evolved 中存在资源权限分配错误,低权限的本地攻击者可以将潜在的恶意文件复制到已存在的 Docker 容器中。这表明在文件系统或容器目录的权限管理上存在问题,导致攻击者能够绕过正常的安全限制。 + - **漏洞效果**:如果管理员随后启动了被篡改的 Docker 容器,容器内的恶意文件将以 root 权限执行,从而可能导致容器内以及宿主机系统的完全控制权被攻击者获取。此问题仅影响启用了 Docker 的系统,并且 Docker 默认未启用时系统不受影响。 + - **影响范围**:漏洞影响特定版本的 Junos OS Evolved(如 20.4、21.2、21.3 和 21.4 等版本),并且需要 Docker 已配置并启用。 + +总结:该 CVE 是 Juniper Networks Junos OS Evolved 的一个容器相关漏洞,涉及权限管理不当,可能导致容器隔离失效和恶意代码以 root 权限执行。 + +cve: ./data/2023/29xxx/CVE-2023-29002.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. When run in debug mode, Cilium will log the contents of the `cilium-secrets` namespace. This could include data such as TLS private keys for Ingress and GatewayAPI resources. An attacker with access to debug output from the Cilium containers could use the resulting output to intercept and modify traffic to and from the affected cluster. Output of the sensitive information would occur at Cilium agent restart, when secrets in the namespace are modified, and on creation of Ingress or GatewayAPI resources. This vulnerability is fixed in Cilium releases 1.11.16, 1.12.9, and 1.13.2. Users unable to upgrade should disable debug mode. + +analysis: 1. 这个CVE信息与namespace相关,因为它提到了`cilium-secrets` namespace的内容被泄露。 +2. 这是Cilium程序的漏洞,Cilium是一个基于eBPF的网络、可观测性和安全解决方案。漏洞发生在Cilium以调试模式运行时,会记录并泄露`cilium-secrets` namespace中的内容,可能包含TLS私钥等敏感信息。效果是攻击者可以通过获取调试输出,拦截和修改到受影响集群的流量。泄露会在Cilium代理重启、namespace中的秘密更新或创建Ingress/GatewayAPI资源时发生。 + +cve: ./data/2023/30xxx/CVE-2023-30549.json +Apptainer is an open source container platform for Linux. There is an ext4 use-after-free flaw that is exploitable through versions of Apptainer < 1.1.0 and installations that include apptainer-suid < 1.1.8 on older operating systems where that CVE has not been patched. That includes Red Hat Enterprise Linux 7, Debian 10 buster (unless the linux-5.10 package is installed), Ubuntu 18.04 bionic and Ubuntu 20.04 focal. Use-after-free flaws in the kernel can be used to attack the kernel for denial of service and potentially for privilege escalation. + +Apptainer 1.1.8 includes a patch that by default disables mounting of extfs filesystem types in setuid-root mode, while continuing to allow mounting of extfs filesystems in non-setuid "rootless" mode using fuse2fs. + +Some workarounds are possible. Either do not install apptainer-suid (for versions 1.1.0 through 1.1.7) or set `allow setuid = no` in apptainer.conf. This requires having unprivileged user namespaces enabled and except for apptainer 1.1.x versions will disallow mounting of sif files, extfs files, and squashfs files in addition to other, less significant impacts. (Encrypted sif files are also not supported unprivileged in apptainer 1.1.x.). Alternatively, use the `limit containers` options in apptainer.conf/singularity.conf to limit sif files to trusted users, groups, and/or paths, and set `allow container extfs = no` to disallow mounting of extfs overlay files. The latter option by itself does not disallow mounting of extfs overlay partitions inside SIF files, so that's why the former options are also needed. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器和隔离相关。Apptainer(前身为 Singularity)是一个用于 Linux 的开源容器平台,旨在为科学计算和高性能计算提供安全和隔离的环境。该漏洞涉及容器内的文件系统挂载行为,以及如何通过 setuid-root 模式或 rootless 模式影响容器的隔离性。 + +2. **程序的漏洞信息:** + - **漏洞所属程序:** Apptainer(一个容器实现)。 + - **漏洞发生原因:** 在 Apptainer 的某些版本中,存在一个 ext4 文件系统的 use-after-free 漏洞。该漏洞可以通过 setuid-root 模式下的文件系统挂载操作触发。具体来说,当 Apptainer 允许在 setuid-root 模式下挂载 extfs 文件系统时,可能会导致内核中的 use-after-free 漏洞被利用。 + - **漏洞效果:** + - 攻击者可能利用此漏洞对内核发起拒绝服务攻击(DoS)。 + - 在更严重的情况下,攻击者可能通过此漏洞实现权限提升(Privilege Escalation),从而突破容器的隔离机制,影响宿主机的安全。 + +3. **总结:** + 该 CVE 描述了一个与 Apptainer 容器平台相关的漏洞,涉及文件系统挂载行为和内核的 use-after-free 漏洞。虽然问题的核心是内核层面的 use-after-free,但其触发条件与容器的配置(setuid-root 模式 vs. rootless 模式)密切相关,因此直接影响了容器的隔离性和安全性。 + +cve: ./data/2023/30xxx/CVE-2023-30840.json +Fluid is an open source Kubernetes-native distributed dataset orchestrator and accelerator for data-intensive applications. Starting in version 0.7.0 and prior to version 0.8.6, if a malicious user gains control of a Kubernetes node running fluid csi pod (controlled by the `csi-nodeplugin-fluid` node-daemonset), they can leverage the fluid-csi service account to modify specs of all the nodes in the cluster. However, since this service account lacks `list node` permissions, the attacker may need to use other techniques to identify vulnerable nodes. + +Once the attacker identifies and modifies the node specs, they can manipulate system-level-privileged components to access all secrets in the cluster or execute pods on other nodes. This allows them to elevate privileges beyond the compromised node and potentially gain full privileged access to the whole cluster. + +To exploit this vulnerability, the attacker can make all other nodes unschedulable (for example, patch node with taints) and wait for system-critical components with high privilege to appear on the compromised node. However, this attack requires two prerequisites: a compromised node and identifying all vulnerable nodes through other means. + +Version 0.8.6 contains a patch for this issue. As a workaround, delete the `csi-nodeplugin-fluid` daemonset in `fluid-system` namespace and avoid using CSI mode to mount FUSE file systems. Alternatively, using sidecar mode to mount FUSE file systems is recommended. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该CVE与容器和Kubernetes集群的隔离性相关。具体来说,它涉及Kubernetes节点上的Fluid CSI Pod(由`csi-nodeplugin-fluid` DaemonSet控制),并且攻击者可以利用该Pod中的服务账户权限来修改节点规格,从而影响整个集群的安全性和隔离性。 + +2. **程序漏洞分析:** + - **程序:** 这是Fluid(一个开源的Kubernetes原生分布式数据集编排器和加速器)的漏洞。 + - **漏洞发生原因:** 在版本0.7.0及以上至0.8.6之前的版本中,`fluid-csi`服务账户具有过高的权限,可以修改集群中所有节点的规格。然而,由于该服务账户缺乏列出节点的权限,攻击者可能需要通过其他手段识别出易受攻击的节点。 + - **效果:** 攻击者可以通过修改节点规格来操控系统级特权组件,进而访问集群中的所有机密信息或在其他节点上执行Pod。这会导致攻击者从单个被攻陷的节点提升权限,最终可能获得对整个集群的完全控制权。此外,攻击者还可以通过设置污点(taints)使其他节点不可调度,诱使高特权的系统组件出现在被攻陷的节点上,进一步扩大攻击范围。 + +总结:这是一个与Kubernetes容器编排相关的漏洞,主要影响Fluid CSI组件的权限管理和节点隔离性。 + +cve: ./data/2023/31xxx/CVE-2023-31003.json +IBM Security Access Manager Container (IBM Security Verify Access Appliance 10.0.0.0 through 10.0.6.1 and IBM Security Verify Access Docker 10.0.6.1) could allow a local user to obtain root access due to improper access controls. IBM X-Force ID: 254658. + +analysis: 1. 该 CVE 信息与 namespace、cgroup、container 或者容器、隔离相关。 + +2. 这是 IBM Security Access Manager Container(基于容器的实现)的漏洞。 + - 漏洞发生的原因是由于容器内部的访问控制配置不当,导致本地用户可以利用此问题提升权限至 root。 + - 效果:攻击者可以通过此漏洞在容器内获得 root 权限,从而完全控制该容器。如果容器共享主机的某些资源或存在不完善的隔离机制,还可能进一步威胁到宿主机或其他容器的安全。 + +cve: ./data/2023/31xxx/CVE-2023-31004.json +IBM Security Access Manager Container (IBM Security Verify Access Appliance 10.0.0.0 through 10.0.6.1 and IBM Security Verify Access Docker 10.0.0.0 through 10.0.6.1) could allow a remote attacker to gain access to the underlying system using man in the middle techniques. IBM X-Force ID: 254765. + +analysis: 1. 该 CVE 信息与 namespace、cgroup、container 或者容器、隔离相关。 +2. 这是 IBM Security Access Manager Container(包括 IBM Security Verify Access Appliance 和 IBM Security Verify Access Docker)的漏洞。该漏洞发生在容器化的环境中,远程攻击者可以通过中间人(Man-in-the-Middle, MITM)技术突破容器的隔离,从而获取底层系统的访问权限。这种攻击可能允许攻击者绕过容器的安全边界,对宿主系统或其他容器造成威胁。 + +cve: ./data/2023/31xxx/CVE-2023-31248.json +Linux Kernel nftables Use-After-Free Local Privilege Escalation Vulnerability; `nft_chain_lookup_byid()` failed to check whether a chain was active and CAP_NET_ADMIN is in any user or network namespace + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与namespace相关,特别是用户命名空间(user namespace)和网络命名空间(network namespace)。 + +2. **漏洞分析**: + - **程序**:这是Linux Kernel的漏洞。 + - **漏洞发生原因**:`nft_chain_lookup_byid()` 函数在查找链时未能验证该链是否处于活动状态,并且未正确检查调用者是否具有 `CAP_NET_ADMIN` 能力。由于 `CAP_NET_ADMIN` 可以在任何用户命名空间或网络命名空间中被授予,攻击者可能利用这一点绕过权限检查。 + - **效果**:此漏洞可能导致 Use-After-Free 问题,从而引发本地特权提升(Local Privilege Escalation)。攻击者可以利用该漏洞从一个受限的用户命名空间或网络命名空间中逃逸,获取更高的权限,甚至完全控制主机系统。这直接影响了系统的隔离性,尤其是在容器环境中,可能会导致容器逃逸。 + +总结:该漏洞与namespace密切相关,涉及Linux Kernel中的nftables模块,可能导致容器逃逸或本地提权。 + +cve: ./data/2023/32xxx/CVE-2023-32191.json +When RKE provisions a cluster, it stores the cluster state in a configmap called `full-cluster-state` inside the `kube-system` namespace of the cluster itself. The information available in there allows non-admin users to escalate to admin. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个 CVE 与 namespace 和容器相关,因为它涉及到 Kubernetes 集群中的 `kube-system` namespace 以及存储在其中的配置信息。 + +2. **程序漏洞分析:** + - **程序:** RKE(Rancher Kubernetes Engine)。 + - **漏洞发生原因:** RKE 在创建集群时,将集群状态存储在一个名为 `full-cluster-state` 的 ConfigMap 中,该 ConfigMap 位于 Kubernetes 集群的 `kube-system` namespace 下。由于权限控制不当,非管理员用户可以访问此 ConfigMap 中的信息。 + - **效果:** 非管理员用户可以通过获取 `full-cluster-state` ConfigMap 中的内容,利用其中的敏感信息(例如凭据或其他授权数据),进而提升权限到管理员级别,从而对整个 Kubernetes 集群造成潜在的安全威胁。 + +总结:此漏洞与 Kubernetes 集群的 namespace 相关,并可能导致权限提升问题。 + +cve: ./data/2023/32xxx/CVE-2023-32194.json +A vulnerability has been identified when granting a create or * global role for a resource type of "namespaces"; no matter the API group, the subject will receive * + permissions for core namespaces. This can lead to someone being capable + of accessing, creating, updating, or deleting a namespace in the +project. + +analysis: 1. 这个CVE信息与namespace相关。 + +2. 这是与Kubernetes或类似的容器编排平台相关的漏洞,而不是内核Kernel或容器实现Docker的漏洞。此漏洞发生在当为某个主体授予创建或全局角色时,如果资源类型为"namespaces",无论API组如何,该主体都会获得对核心命名空间的全部权限。这种情况下,攻击者可能能够访问、创建、更新或删除项目中的命名空间,从而破坏隔离性并可能导致未授权的操作。 + +cve: ./data/2023/32xxx/CVE-2023-32696.json +CKAN is an open-source data management system for powering data hubs and data portals. Prior to versions 2.9.9 and 2.10.1, the `ckan` user (equivalent to www-data) owned code and configuration files in the docker container and the `ckan` user had the permissions to use sudo. These issues allowed for code execution or privilege escalation if an arbitrary file write bug was available. Versions 2.9.9, 2.9.9-dev, 2.10.1, and 2.10.1-dev contain a patch. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是CKAN程序的漏洞,它是一个开源数据管理系统。漏洞发生在容器内的CKAN应用中,因为`ckan`用户拥有代码和配置文件的所有权,并且具有使用sudo的权限。如果存在任意文件写入漏洞,这可能导致代码执行或特权提升。效果是攻击者可能在容器内获得更高的权限并执行恶意代码。 + +cve: ./data/2023/34xxx/CVE-2023-34242.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. Prior to version 1.13.4, when Gateway API is enabled in Cilium, the absence of a check on the namespace in which a ReferenceGrant is created could result in Cilium unintentionally gaining visibility of secrets (including certificates) and services across namespaces. An attacker on an affected cluster can leverage this issue to use cluster secrets that should not be visible to them, or communicate with services that they should not have access to. Gateway API functionality is disabled by default. This vulnerability is fixed in Cilium release 1.13.4. As a workaround, restrict the creation of `ReferenceGrant` resources to admin users by using Kubernetes RBAC. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与namespace相关。问题涉及Cilium在处理Gateway API时,未正确检查ReferenceGrant所创建的namespace,导致跨namespace的敏感信息(如secret和证书)被无意中访问。 + +2. **漏洞所属程序及影响分析**: + - **程序**:Cilium(一个基于eBPF的网络、可观测性和安全解决方案)。 + - **漏洞发生原因**:当Gateway API功能启用时,Cilium未对ReferenceGrant资源的创建进行正确的namespace检查,从而可能导致跨namespace的权限提升。 + - **效果**:攻击者可以利用此漏洞访问不应可见的集群secrets(如证书)或与不应访问的服务进行通信,破坏了namespace之间的隔离性。 + +总结:这是一个Cilium的漏洞,与namespace隔离机制相关,可能导致跨namespace的信息泄露和服务访问权限提升。 + +cve: ./data/2023/34xxx/CVE-2023-34844.json +Play With Docker < 0.0.2 has an insecure CAP_SYS_ADMIN privileged mode causing the docker container to escape. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是容器实现Docker的漏洞。 + 漏洞发生的原因是Play With Docker在版本0.0.2之前,以不安全的CAP_SYS_ADMIN权限运行容器。这种特权模式允许容器内的进程获得对主机系统的广泛控制权,从而导致容器逃逸。 + 效果:攻击者可以利用此漏洞突破容器的隔离机制,访问宿主机系统及其上的其他资源,可能导致数据泄露、系统篡改或其他恶意行为。 + +cve: ./data/2023/35xxx/CVE-2023-35001.json +Linux Kernel nftables Out-Of-Bounds Read/Write Vulnerability; nft_byteorder poorly handled vm register contents when CAP_NET_ADMIN is in any user or network namespace + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 信息与 namespace 相关。描述中提到 "CAP_NET_ADMIN is in any user or network namespace",明确涉及网络命名空间(network namespace)和用户命名空间(user namespace),这些都是 Linux 容器隔离机制的重要组成部分。 + +2. **漏洞所属程序及影响分析**: + - **程序**:这是 Linux 内核(Kernel)中的漏洞,具体位于 nftables 的 `nft_byteorder` 组件。 + - **漏洞发生原因**:`nft_byteorder` 在处理虚拟机寄存器(vm register)内容时未能正确验证数据,导致可能发生越界读写(Out-Of-Bounds Read/Write)。 + - **效果**:由于此漏洞在任何用户或网络命名空间中拥有 `CAP_NET_ADMIN` 权限时均可触发,攻击者可能利用此漏洞破坏内核内存,进而导致系统崩溃(拒绝服务,DoS)或潜在的权限提升(例如从容器内部逃逸到宿主机)。这直接影响了基于命名空间的隔离机制的安全性,可能被用于容器逃逸攻击。 + +总结:该 CVE 与命名空间相关,是 Linux 内核 nftables 模块的漏洞,可能导致越界读写问题,影响容器隔离安全性,并可能被用于容器逃逸或其他恶意行为。 + +cve: ./data/2023/36xxx/CVE-2023-36815.json +Sealos is a Cloud Operating System designed for managing cloud-native applications. In version 4.2.0 and prior, there is a permission flaw in the Sealos billing system, which allows users to control the recharge resource account `sealos[.] io/v1/Payment`, resulting in the ability to recharge any amount of 1 renminbi (RMB). The charging interface may expose resource information. The namespace of this custom resource would be user's control and may have permission to correct it. It is not clear whether a fix exists. + +analysis: 1. 这个CVE信息与namespace相关,因为它提到自定义资源的namespace可以被用户控制。 + +2. 这是Sealos程序的漏洞,Sealos是一个用于管理云原生应用的云操作系统。该漏洞发生在Sealos计费系统中,由于权限缺陷,允许用户控制充值资源账户`sealos[.]io/v1/Payment`,从而可以充值任意金额(1元人民币)。此漏洞的效果是用户可能滥用计费系统,导致资金损失或资源信息泄露。 + +N/A + +cve: ./data/2023/37xxx/CVE-2023-37273.json +Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model. Running Auto-GPT version prior to 0.4.3 by cloning the git repo and executing `docker compose run auto-gpt` in the repo root uses a different docker-compose.yml file from the one suggested in the official docker set up instructions. The docker-compose.yml file located in the repo root mounts itself into the docker container without write protection. This means that if malicious custom python code is executed via the `execute_python_file` and `execute_python_code` commands, it can overwrite the docker-compose.yml file and abuse it to gain control of the host system the next time Auto-GPT is started. The issue has been patched in version 0.4.3. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与容器相关。问题出在 Docker 容器配置中,`docker-compose.yml` 文件被挂载到容器内且没有写保护,导致恶意代码可以通过覆盖该文件来破坏隔离性并影响主机系统。 + +2. **程序漏洞分析**: + - **这是什么程序的漏洞**:这是 Auto-GPT 应用程序的漏洞,具体是其 Docker 容器配置中的一个安全问题。 + - **漏洞如何发生**:Auto-GPT 在运行时使用了错误的 `docker-compose.yml` 文件(位于代码库根目录),此文件被挂载到容器内部,但未设置只读权限。如果攻击者能够通过 `execute_python_file` 或 `execute_python_code` 命令执行恶意 Python 代码,则可以修改挂载的 `docker-compose.yml` 文件。 + - **漏洞效果**:攻击者可以利用此漏洞覆盖 `docker-compose.yml` 文件,从而在下次启动 Auto-GPT 时滥用修改后的配置文件,进而可能获得对主机系统的控制权。这破坏了容器与主机之间的隔离机制。 + +总结:该 CVE 与容器隔离性直接相关,涉及容器配置不当导致的安全风险。 + +cve: ./data/2023/39xxx/CVE-2023-39347.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. An attacker with the ability to update pod labels can cause Cilium to apply incorrect network policies. This issue arises due to the fact that on pod update, Cilium incorrectly uses user-provided pod labels to select the policies which apply to the workload in question. This can affect Cilium network policies that use the namespace, service account or cluster constructs to restrict traffic, Cilium clusterwide network policies that use Cilium namespace labels to select the Pod and Kubernetes network policies. Non-existent construct names can be provided, which bypass all network policies applicable to the construct. For example, providing a pod with a non-existent namespace as the value of the `io.kubernetes.pod.namespace` label results in none of the namespaced CiliumNetworkPolicies applying to the pod in question. This attack requires the attacker to have Kubernetes API Server access, as described in the Cilium Threat Model. This issue has been resolved in: Cilium versions 1.14.2, 1.13.7, and 1.12.14. Users are advised to upgrade. As a workaround an admission webhook can be used to prevent pod label updates to the `k8s:io.kubernetes.pod.namespace` and `io.cilium.k8s.policy.*` keys. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器隔离相关:** + 是的,该 CVE 与 namespace 和容器隔离相关。问题的核心在于 Cilium 在处理 Pod 更新时,错误地使用了用户提供的 Pod 标签(labels),这些标签可能涉及 namespace 或其他与隔离相关的构造(constructs),从而导致网络策略被绕过。 + +2. **漏洞分析:** + - **程序名称:** Cilium(一个基于 eBPF 的容器网络、可观测性和安全解决方案)。 + - **漏洞发生原因:** 当 Pod 被更新时,Cilium 错误地依赖用户提供的 Pod 标签来选择适用于该工作负载的网络策略。特别是,攻击者可以通过提供不存在的 namespace 名称或其他标签值,绕过所有与该 namespace 相关的 Cilium 网络策略。 + - **效果:** 攻击者可以利用此漏洞使特定 Pod 不受任何网络策略的限制,从而破坏容器之间的网络隔离。例如,通过将 `io.kubernetes.pod.namespace` 标签设置为一个不存在的 namespace,可以使该 Pod 不受任何 namespaced CiliumNetworkPolicies 的约束。 + - **影响:** 此漏洞可能导致容器间的网络隔离失效,攻击者可能滥用此漏洞访问未授权的资源或服务。 + +总结:CVE-2023-39347 与 namespace 和容器隔离密切相关,是 Cilium 中的一个漏洞,允许攻击者通过篡改 Pod 标签绕过网络策略,破坏容器间的网络隔离。 + +cve: ./data/2023/3xxx/CVE-2023-3027.json +The grc-policy-propagator allows security escalation within the cluster. The propagator allows policies which contain some dynamically obtained values (instead of the policy apply a static manifest on a managed cluster) of taking advantage of cluster scoped access in a created policy. This feature does not restrict properly to lookup content from the namespace where the policy was created. + +analysis: 1. 这个CVE信息与namespace相关,因为它提到策略创建时未能正确限制从创建策略的namespace中查找内容。 +2. 这是容器编排或管理平台中的漏洞,具体来说是grc-policy-propagator程序的漏洞。这个漏洞发生在grc-policy-propagator处理动态策略时,由于未正确限制对namespace内容的访问,攻击者可能利用此漏洞在集群内进行安全权限提升。效果是可能导致集群内其他资源被未经授权访问或控制。 +3. 该问题并非内核(Kernel)或容器实现(如Docker)的漏洞,而是容器管理组件的逻辑漏洞。 + +cve: ./data/2023/40xxx/CVE-2023-40453.json +Docker Machine through 0.16.2 allows an attacker, who has control of a worker node, to provide crafted version data, which might potentially trick an administrator into performing an unsafe action (via escape sequence injection), or might have a data size that causes a denial of service to a bastion node. NOTE: This vulnerability only affects products that are no longer supported by the maintainer. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Machine程序的漏洞。漏洞发生在当攻击者控制了一个工作节点(worker node)时,可以通过提供伪造的版本数据,利用转义序列注入欺骗管理员执行不安全的操作,或者通过提供过大的数据大小导致 bastion 节点的拒绝服务(Denial of Service, DoS)。此漏洞影响的是容器编排和管理层面的安全性,可能导致管理员操作被操控或关键节点服务中断。 + +cve: ./data/2023/41xxx/CVE-2023-41333.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. An attacker with the ability to create or modify CiliumNetworkPolicy objects in a particular namespace is able to affect traffic on an entire Cilium cluster, potentially bypassing policy enforcement in other namespaces. By using a crafted `endpointSelector` that uses the `DoesNotExist` operator on the `reserved:init` label, the attacker can create policies that bypass namespace restrictions and affect the entire Cilium cluster. This includes potentially allowing or denying all traffic. This attack requires API server access, as described in the Kubernetes API Server Attacker section of the Cilium Threat Model. This issue has been resolved in Cilium versions 1.14.2, 1.13.7, and 1.12.14. As a workaround an admission webhook can be used to prevent the use of `endpointSelectors` that use the `DoesNotExist` operator on the `reserved:init` label in CiliumNetworkPolicies. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与namespace和容器隔离密切相关。CiliumNetworkPolicy对象的作用范围是特定的namespace,但通过构造特定的`endpointSelector`(使用`DoesNotExist`操作符匹配`reserved:init`标签),攻击者可以绕过namespace限制,影响整个Cilium集群的网络策略执行。这直接影响了namespace之间的隔离性。 + +2. **程序漏洞分析**: + - **程序**:这是Cilium的漏洞,而非内核Kernel或容器实现(如Docker)。Cilium是一个基于eBPF的容器网络解决方案,用于提供网络连接、可观测性和安全性。 + - **漏洞发生机制**:攻击者需要具备在某个namespace中创建或修改CiliumNetworkPolicy对象的能力。通过构造一个特殊的`endpointSelector`(使用`DoesNotExist`操作符匹配`reserved:init`标签),攻击者能够创建影响整个Cilium集群的策略,而不是局限于特定namespace。 + - **效果**:此漏洞允许攻击者绕过namespace隔离,从而可能对整个集群的网络流量进行未经授权的控制,包括允许或阻止所有流量。这严重破坏了多租户环境下的隔离性,可能导致数据泄露或服务中断。 + +cve: ./data/2023/42xxx/CVE-2023-42125.json +Avast Premium Security Sandbox Protection Link Following Privilege Escalation Vulnerability. This vulnerability allows local attackers to escalate privileges on affected installations of Avast Premium Security. An attacker must first obtain the ability to execute low-privileged code on the target system in order to exploit this vulnerability. + +The specific flaw exists within the implementation of the sandbox feature. By creating a symbolic link, an attacker can abuse the service to create arbitrary namespace objects. An attacker can leverage this vulnerability to escalate privileges and execute arbitrary code in the context of SYSTEM. +. Was ZDI-CAN-20383. + +analysis: 1. 这个CVE信息与namespace相关,因为它提到了通过创建符号链接来生成任意namespace对象。 +2. 这是Avast Premium Security程序的漏洞。该漏洞发生在沙箱功能的实现中,由于对符号链接处理不当,攻击者可以利用此漏洞创建任意namespace对象,从而提升权限并以SYSTEM身份执行任意代码。这并非内核Kernel或容器实现Docker的漏洞,而是与沙箱隔离机制相关的应用程序漏洞。 + +cve: ./data/2023/52xxx/CVE-2023-52443.json +In the Linux kernel, the following vulnerability has been resolved: + +apparmor: avoid crash when parsed profile name is empty + +When processing a packed profile in unpack_profile() described like + + "profile :ns::samba-dcerpcd /usr/lib*/samba/{,samba/}samba-dcerpcd {...}" + +a string ":samba-dcerpcd" is unpacked as a fully-qualified name and then +passed to aa_splitn_fqname(). + +aa_splitn_fqname() treats ":samba-dcerpcd" as only containing a namespace. +Thus it returns NULL for tmpname, meanwhile tmpns is non-NULL. Later +aa_alloc_profile() crashes as the new profile name is NULL now. + +general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN NOPTI +KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] +CPU: 6 PID: 1657 Comm: apparmor_parser Not tainted 6.7.0-rc2-dirty #16 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 +RIP: 0010:strlen+0x1e/0xa0 +Call Trace: + + ? strlen+0x1e/0xa0 + aa_policy_init+0x1bb/0x230 + aa_alloc_profile+0xb1/0x480 + unpack_profile+0x3bc/0x4960 + aa_unpack+0x309/0x15e0 + aa_replace_profiles+0x213/0x33c0 + policy_update+0x261/0x370 + profile_replace+0x20e/0x2a0 + vfs_write+0x2af/0xe00 + ksys_write+0x126/0x250 + do_syscall_64+0x46/0xf0 + entry_SYSCALL_64_after_hwframe+0x6e/0x76 + +---[ end trace 0000000000000000 ]--- +RIP: 0010:strlen+0x1e/0xa0 + +It seems such behaviour of aa_splitn_fqname() is expected and checked in +other places where it is called (e.g. aa_remove_profiles). Well, there +is an explicit comment "a ns name without a following profile is allowed" +inside. + +AFAICS, nothing can prevent unpacked "name" to be in form like +":samba-dcerpcd" - it is passed from userspace. + +Deny the whole profile set replacement in such case and inform user with +EPROTO and an explaining message. + +Found by Linux Verification Center (linuxtesting.org). + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与namespace相关。问题出现在AppArmor对带有命名空间(namespace)的配置文件解析过程中,特别是当解析到一个空的或不完整的profile名称时,导致内核崩溃。 + +2. **这是什么程序的漏洞**: + - **程序**:Linux内核(Kernel)。 + - **漏洞发生原因**:在处理AppArmor的打包配置文件时,`unpack_profile()`函数会解析profile名称。如果profile名称仅包含命名空间(如`:samba-dcerpcd`),而不包含实际的profile部分,则`aa_splitn_fqname()`函数会将此视为仅有一个命名空间,并返回`NULL`作为profile名称。随后,在调用`aa_alloc_profile()`时,由于传入的profile名称为`NULL`,导致内核崩溃(general protection fault)。 + - **效果**:该漏洞可能导致系统崩溃(kernel crash),从而影响系统的可用性和稳定性。攻击者可以通过构造恶意的AppArmor配置文件触发此漏洞,进而导致拒绝服务(DoS)攻击。 + +总结:这是一个与namespace相关的Linux内核漏洞,涉及AppArmor模块的profile解析过程,可能导致系统崩溃。 + +cve: ./data/2023/52xxx/CVE-2023-52707.json +In the Linux kernel, the following vulnerability has been resolved: + +sched/psi: Fix use-after-free in ep_remove_wait_queue() + +If a non-root cgroup gets removed when there is a thread that registered +trigger and is polling on a pressure file within the cgroup, the polling +waitqueue gets freed in the following path: + + do_rmdir + cgroup_rmdir + kernfs_drain_open_files + cgroup_file_release + cgroup_pressure_release + psi_trigger_destroy + +However, the polling thread still has a reference to the pressure file and +will access the freed waitqueue when the file is closed or upon exit: + + fput + ep_eventpoll_release + ep_free + ep_remove_wait_queue + remove_wait_queue + +This results in use-after-free as pasted below. + +The fundamental problem here is that cgroup_file_release() (and +consequently waitqueue's lifetime) is not tied to the file's real lifetime. +Using wake_up_pollfree() here might be less than ideal, but it is in line +with the comment at commit 42288cb44c4b ("wait: add wake_up_pollfree()") +since the waitqueue's lifetime is not tied to file's one and can be +considered as another special case. While this would be fixable by somehow +making cgroup_file_release() be tied to the fput(), it would require +sizable refactoring at cgroups or higher layer which might be more +justifiable if we identify more cases like this. + + BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x60/0xc0 + Write of size 4 at addr ffff88810e625328 by task a.out/4404 + + CPU: 19 PID: 4404 Comm: a.out Not tainted 6.2.0-rc6 #38 + Hardware name: Amazon EC2 c5a.8xlarge/, BIOS 1.0 10/16/2017 + Call Trace: + + dump_stack_lvl+0x73/0xa0 + print_report+0x16c/0x4e0 + kasan_report+0xc3/0xf0 + kasan_check_range+0x2d2/0x310 + _raw_spin_lock_irqsave+0x60/0xc0 + remove_wait_queue+0x1a/0xa0 + ep_free+0x12c/0x170 + ep_eventpoll_release+0x26/0x30 + __fput+0x202/0x400 + task_work_run+0x11d/0x170 + do_exit+0x495/0x1130 + do_group_exit+0x100/0x100 + get_signal+0xd67/0xde0 + arch_do_signal_or_restart+0x2a/0x2b0 + exit_to_user_mode_prepare+0x94/0x100 + syscall_exit_to_user_mode+0x20/0x40 + do_syscall_64+0x52/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + + Allocated by task 4404: + + kasan_set_track+0x3d/0x60 + __kasan_kmalloc+0x85/0x90 + psi_trigger_create+0x113/0x3e0 + pressure_write+0x146/0x2e0 + cgroup_file_write+0x11c/0x250 + kernfs_fop_write_iter+0x186/0x220 + vfs_write+0x3d8/0x5c0 + ksys_write+0x90/0x110 + do_syscall_64+0x43/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + Freed by task 4407: + + kasan_set_track+0x3d/0x60 + kasan_save_free_info+0x27/0x40 + ____kasan_slab_free+0x11d/0x170 + slab_free_freelist_hook+0x87/0x150 + __kmem_cache_free+0xcb/0x180 + psi_trigger_destroy+0x2e8/0x310 + cgroup_file_release+0x4f/0xb0 + kernfs_drain_open_files+0x165/0x1f0 + kernfs_drain+0x162/0x1a0 + __kernfs_remove+0x1fb/0x310 + kernfs_remove_by_name_ns+0x95/0xe0 + cgroup_addrm_files+0x67f/0x700 + cgroup_destroy_locked+0x283/0x3c0 + cgroup_rmdir+0x29/0x100 + kernfs_iop_rmdir+0xd1/0x140 + vfs_rmdir+0xfe/0x240 + do_rmdir+0x13d/0x280 + __x64_sys_rmdir+0x2c/0x30 + do_syscall_64+0x43/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,该漏洞与cgroup(控制组)相关。 + +2. **程序的漏洞分析**: + - **程序**:这是Linux内核(Kernel)中的一个漏洞。 + - **漏洞发生位置**:漏洞发生在调度子系统(sched)中的psi(Pressure Stall Information)模块。具体是在`ep_remove_wait_queue()`函数中存在use-after-free问题。 + - **漏洞发生原因**:当一个非root cgroup被移除时,如果有一个线程在该cgroup的压力文件上注册了触发器并正在进行轮询操作,那么在cgroup移除的过程中,相关的轮询等待队列(waitqueue)会被释放。然而,执行轮询的线程仍然持有对压力文件的引用,并在文件关闭或线程退出时尝试访问已经被释放的等待队列,从而导致use-after-free问题。 + - **效果**:此漏洞可能导致内核崩溃(kernel panic)或不稳定行为,因为use-after-free问题会破坏内存完整性,可能被恶意利用来执行任意代码或导致系统崩溃。 + +总结:这是一个Linux内核中的漏洞,与cgroup的生命周期管理相关,可能导致严重的内核安全问题和稳定性问题。 + +cve: ./data/2023/52xxx/CVE-2023-52880.json +In the Linux kernel, the following vulnerability has been resolved: + +tty: n_gsm: require CAP_NET_ADMIN to attach N_GSM0710 ldisc + +Any unprivileged user can attach N_GSM0710 ldisc, but it requires +CAP_NET_ADMIN to create a GSM network anyway. + +Require initial namespace CAP_NET_ADMIN to do that. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 相关。具体来说,它涉及 Linux 内核中的命名空间(namespace)权限控制问题,特别是网络命名空间(network namespace)中的能力(capability)检查。 + +2. **程序漏洞信息**: + - 这是 **Linux 内核 (Kernel)** 的漏洞。 + - 漏洞发生的原因是:未正确限制非特权用户在特定条件下附加 `N_GSM0710` 行规解释器(line discipline, ldisc)的能力。虽然创建 GSM 网络本身需要 `CAP_NET_ADMIN` 能力,但在此之前附加该行规解释器并未进行充分的权限检查。 + - 漏洞效果:任何非特权用户都可以附加 `N_GSM0710` ldisc,这可能导致意外的行为或进一步的权限提升风险,尤其是在共享的容器环境中,攻击者可能利用此漏洞绕过预期的隔离机制。通过要求初始命名空间的 `CAP_NET_ADMIN` 权限,修复了这一问题,从而增强了隔离性。 + +cve: ./data/2023/52xxx/CVE-2023-52939.json +In the Linux kernel, the following vulnerability has been resolved: + +mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() + +As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"), +hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg +could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could +occurs a NULL pointer dereference, let's do not record the foreign +writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to +fix it. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Linux内核的漏洞。该漏洞发生在内存控制组(memcg)相关的代码中,具体是在处理LRU hwpoisoned页面时,由于hwpoison会强制卸载一个LRU hwpoisoned页面,导致folio_memcg可能为NULL。如果调用`mem_cgroup_track_foreign_dirty_slowpath()`函数时未正确检查此情况,则可能会引发NULL指针解引用问题。这种漏洞可能导致系统崩溃或不稳定,尤其是在容器环境中使用内存控制组进行资源限制和监控时,可能会影响容器的隔离性和稳定性。 + +cve: ./data/2023/52xxx/CVE-2023-52940.json +In the Linux kernel, the following vulnerability has been resolved: + +mm: multi-gen LRU: fix crash during cgroup migration + +lru_gen_migrate_mm() assumes lru_gen_add_mm() runs prior to itself. This +isn't true for the following scenario: + + CPU 1 CPU 2 + + clone() + cgroup_can_fork() + cgroup_procs_write() + cgroup_post_fork() + task_lock() + lru_gen_migrate_mm() + task_unlock() + task_lock() + lru_gen_add_mm() + task_unlock() + +And when the above happens, kernel crashes because of linked list +corruption (mm_struct->lru_gen.list). + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关** + 是的,该 CVE 与 cgroup 相关。描述中明确提到 `cgroup_migration` 和 `lru_gen_migrate_mm()` 函数,这些都涉及控制组(cgroup)的管理,而 cgroup 是 Linux 容器实现资源隔离的重要组成部分。 + +2. **漏洞所属程序及分析** + - **程序**:这是 Linux 内核(Kernel)的漏洞。 + - **漏洞发生原因**:在多代 LRU(Least Recently Used)机制中,`lru_gen_migrate_mm()` 函数假设 `lru_gen_add_mm()` 在其之前运行。然而,在特定的竞争条件下(如上述 CPU 1 和 CPU 2 的场景),`lru_gen_migrate_mm()` 可能在 `lru_gen_add_mm()` 之前执行,导致内核崩溃。 + - **效果**:当这种情况发生时,由于链表(`mm_struct->lru_gen.list`)被破坏,内核会崩溃。这可能会影响依赖 cgroup 的容器化环境的稳定性,甚至可能导致整个系统崩溃。 + +总结:该漏洞与 cgroup 相关,属于 Linux 内核的 bug,可能影响容器环境的稳定性。 + +cve: ./data/2023/52xxx/CVE-2023-52942.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup/cpuset: Fix wrong check in update_parent_subparts_cpumask() + +It was found that the check to see if a partition could use up all +the cpus from the parent cpuset in update_parent_subparts_cpumask() +was incorrect. As a result, it is possible to leave parent with no +effective cpu left even if there are tasks in the parent cpuset. This +can lead to system panic as reported in [1]. + +Fix this probem by updating the check to fail the enabling the partition +if parent's effective_cpus is a subset of the child's cpus_allowed. + +Also record the error code when an error happens in update_prstate() +and add a test case where parent partition and child have the same cpu +list and parent has task. Enabling partition in the child will fail in +this case. + +[1] https://www.spinics.net/lists/cgroups/msg36254.html + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与cgroup相关。具体来说,它涉及`cpuset`子系统,这是cgroup的一个功能模块,用于限制进程可以使用的CPU和内存节点。 + +2. **程序漏洞信息**: + - **程序**:Linux内核(Kernel)。 + - **漏洞原因**:在`update_parent_subparts_cpumask()`函数中,检查逻辑存在缺陷,导致子`cpuset`可能占用父`cpuset`的所有CPU资源,而使父`cpuset`没有有效的CPU资源可供其任务使用。 + - **效果**:如果父`cpuset`中的任务无法访问任何CPU资源,可能会导致系统崩溃(panic)。这种情况尤其可能发生在多租户环境或容器化环境中,因为cgroup通常用于隔离和分配资源。 + +总结:这是一个Linux内核中cgroup子系统的漏洞,可能导致系统崩溃,且与资源隔离密切相关。 + +cve: ./data/2023/5xxx/CVE-2023-5165.json +Docker Desktop before 4.23.0 allows an unprivileged user to bypass Enhanced Container Isolation (ECI) restrictions via the debug shell which remains accessible for a short time window after launching Docker Desktop. The affected functionality is available for Docker Business customers only and assumes an environment where users are not granted local root or Administrator privileges. + +This issue has been fixed in Docker Desktop 4.23.0. + +Affected Docker Desktop versions: from 4.13.0 before 4.23.0. + +analysis: 1. 该 CVE 信息与 container 和隔离相关。 + +2. 这是 Docker Desktop 的漏洞。漏洞发生在 Docker Desktop 的 Enhanced Container Isolation (ECI) 功能中,由于调试 shell 在启动 Docker Desktop 后的一小段时间内仍然可访问,未授权的用户可以利用这一时间窗口绕过 ECI 的限制。此漏洞的效果是允许无特权的用户绕过容器隔离限制,从而可能获得对容器或主机系统的非授权访问权限。该漏洞仅影响 Docker Business 客户,并假设用户没有本地 root 或 Administrator 权限。 + +cve: ./data/2024/0xxx/CVE-2024-0137.json +NVIDIA Container Toolkit contains an improper isolation vulnerability where a specially crafted container image could lead to untrusted code running in the host’s network namespace. This vulnerability is present only when the NVIDIA Container Toolkit is configured in a nondefault way. A successful exploit of this vulnerability may lead to denial of service and escalation of privileges. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与容器和隔离相关,具体涉及网络命名空间(network namespace)的隔离问题。 + +2. **程序漏洞信息**: + - **程序**:NVIDIA Container Toolkit + - **漏洞原因**:当NVIDIA Container Toolkit被配置为非默认方式时,可能存在隔离不当的问题。如果使用了特殊构造的容器镜像,可能导致不可信代码在主机的网络命名空间中运行。 + - **效果**: + - 攻击者可能利用此漏洞在主机的网络命名空间中执行恶意代码,破坏隔离性。 + - 成功利用后可能导致拒绝服务(Denial of Service, DoS)或权限提升(Escalation of Privileges)。 + +总结:这是一个与容器隔离相关的漏洞,影响NVIDIA Container Toolkit,可能导致主机网络命名空间的安全性受损。 + +cve: ./data/2024/0xxx/CVE-2024-0443.json +A flaw was found in the blkgs destruction path in block/blk-cgroup.c in the Linux kernel, leading to a cgroup blkio memory leakage problem. When a cgroup is being destroyed, cgroup_rstat_flush() is only called at css_release_work_fn(), which is called when the blkcg reference count reaches 0. This circular dependency will prevent blkcg and some blkgs from being freed after they are made offline. This issue may allow an attacker with a local access to cause system instability, such as an out of memory error. + +analysis: 1. 该 CVE 信息与 cgroup 相关。 + +2. 这是 Linux 内核的漏洞。漏洞发生在块设备控制组(blk-cgroup)的销毁路径中,具体是在 `block/blk-cgroup.c` 文件中。由于在销毁 cgroup 时,`cgroup_rstat_flush()` 只在 `css_release_work_fn()` 中被调用,而该函数仅在 blkcg 引用计数达到 0 时才会触发,导致循环依赖问题。这种情况下,即使 blkcg 和 blkgs 被标记为离线,它们也无法被正确释放,从而引发内存泄漏。 + + 漏洞效果:攻击者可以通过本地访问利用此漏洞,导致系统内存耗尽,进而可能引发系统不稳定或崩溃。 + +cve: ./data/2024/10xxx/CVE-2024-10975.json +Nomad Community and Nomad Enterprise ("Nomad") volume specification is vulnerable to arbitrary cross-namespace volume creation through unauthorized Container Storage Interface (CSI) volume writes. This vulnerability, identified as CVE-2024-10975, is fixed in Nomad Community Edition 1.9.2 and Nomad Enterprise 1.9.2, 1.8.7, and 1.7.15. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Nomad程序的漏洞,Nomad是一个用于工作负载编排的工具,通常用于管理容器化应用。该漏洞是由于未经授权的Container Storage Interface (CSI)卷写入操作导致的,攻击者可以通过此漏洞实现跨命名空间(namespace)的任意卷创建。这种行为破坏了容器之间的隔离性,可能导致敏感数据泄露或未授权的数据访问。 + +cve: ./data/2024/11xxx/CVE-2024-11075.json +A vulnerability in the Incoming Goods Suite allows a user with unprivileged access to the underlying system (e.g. local or via SSH) a privilege escalation to the administrative level due to the usage of component vendor Docker images running with root permissions. Exploiting this misconfiguration leads to the fact that an attacker can gain administrative control. over the whole system. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是容器实现(Docker)相关的漏洞。漏洞发生的原因是由于Incoming Goods Suite使用了供应商提供的Docker镜像,而这些镜像以root权限运行。这种配置错误使得未授权的用户可以通过本地访问或SSH访问底层系统,并利用此漏洞进行权限提升,最终获得系统的管理员控制权。效果是攻击者可以完全控制整个系统。 + +cve: ./data/2024/12xxx/CVE-2024-12678.json +Nomad Community and Nomad Enterprise ("Nomad") allocations are vulnerable to privilege escalation within a namespace through unredacted workload identity tokens. This vulnerability, identified as CVE-2024-12678, is fixed in Nomad Community Edition 1.9.4 and Nomad Enterprise 1.9.4, 1.8.8, and 1.7.16. + +analysis: 1. 这个CVE信息与namespace相关,因为描述中提到“privilege escalation within a namespace”。 + +2. 这是Nomad程序的漏洞。Nomad是一个用于工作负载编排的工具,支持容器化和非容器化应用的部署与管理。该漏洞发生的原因是由于在namespace内未正确处理或遮蔽工作负载身份令牌(unredacted workload identity tokens),导致攻击者可能利用这些令牌提升权限。其效果是攻击者可以在namespace内进行特权提升,从而可能访问或控制其他资源。 + +N/A + +cve: ./data/2024/21xxx/CVE-2024-21626.json +runc is a CLI tool for spawning and running containers on Linux according to the OCI specification. In runc 1.1.11 and earlier, due to an internal file descriptor leak, an attacker could cause a newly-spawned container process (from runc exec) to have a working directory in the host filesystem namespace, allowing for a container escape by giving access to the host filesystem ("attack 2"). The same attack could be used by a malicious image to allow a container process to gain access to the host filesystem through runc run ("attack 1"). Variants of attacks 1 and 2 could be also be used to overwrite semi-arbitrary host binaries, allowing for complete container escapes ("attack 3a" and "attack 3b"). runc 1.1.12 includes patches for this issue. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace、container 和隔离机制密切相关。漏洞涉及 runc 工具在处理容器时未能正确隔离文件描述符和工作目录,导致容器可以访问主机文件系统,破坏了容器与主机之间的隔离。 + +2. **程序漏洞信息及影响后果**: + - **程序**:这是 runc 的漏洞,runc 是一个用于根据 OCI 规范创建和运行容器的 CLI 工具,属于容器实现的一部分。 + - **漏洞发生原因**:由于 runc 在 1.1.11 及更早版本中存在内部文件描述符泄漏问题,导致新生成的容器进程(通过 `runc exec` 或 `runc run`)的工作目录位于主机文件系统命名空间中,而非容器隔离的命名空间内。 + - **效果**: + - 攻击者可以通过此漏洞实现容器逃逸,访问主机文件系统(攻击 1 和攻击 2)。 + - 攻击者还可以利用变种攻击(攻击 3a 和攻击 3b)覆盖主机上的部分二进制文件,进一步获得对主机的完全控制权。 + +cve: ./data/2024/22xxx/CVE-2024-22036.json +A vulnerability has been identified within Rancher where a cluster or node driver can be used to escape the chroot + jail and gain root access to the Rancher container itself. In +production environments, further privilege escalation is possible based +on living off the land within the Rancher container itself. For the test + and development environments, based on a –privileged Docker container, +it is possible to escape the Docker container and gain execution access +on the host system. + + +This issue affects rancher: from 2.7.0 before 2.7.16, from 2.8.0 before 2.8.9, from 2.9.0 before 2.9.3. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是,该漏洞涉及容器逃逸问题,直接影响容器的隔离性。 + +2. **程序漏洞分析**: + - **程序**:这是 Rancher 的漏洞,Rancher 是一个容器管理平台,基于 Docker 和 Kubernetes。 + - **漏洞发生原因**:在 Rancher 中,集群或节点驱动程序存在缺陷,攻击者可以利用该缺陷从 chroot 环境中逃逸,并进一步获得 Rancher 容器的 root 权限。 + - **漏洞效果**: + - 在生产环境中,攻击者可以通过“living off the land”(利用目标系统已有的工具和功能)实现进一步的权限提升。 + - 在测试和开发环境中,如果 Rancher 容器以 `--privileged` 模式运行,攻击者可以完全逃逸出 Docker 容器,获取宿主机系统的执行权限,从而破坏容器隔离机制,威胁整个系统的安全性。 + +cve: ./data/2024/23xxx/CVE-2024-23651.json +BuildKit is a toolkit for converting source code to build artifacts in an efficient, expressive and repeatable manner. Two malicious build steps running in parallel sharing the same cache mounts with subpaths could cause a race condition that can lead to files from the host system being accessible to the build container. The issue has been fixed in v0.12.5. Workarounds include, avoiding using BuildKit frontend from an untrusted source or building an untrusted Dockerfile containing cache mounts with --mount=type=cache,source=... options. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 该 CVE 信息与容器和隔离相关。问题涉及 BuildKit 中的构建步骤在共享缓存挂载时出现竞争条件,可能导致主机文件系统被访问,这直接影响容器的隔离性。 + +2. **程序漏洞分析**: + - **这是什么程序的漏洞**:这是 BuildKit 的漏洞。BuildKit 是一个用于高效构建容器镜像的工具,通常与 Docker 等容器技术集成使用。 + - **漏洞如何发生**:当两个恶意构建步骤并行运行且共享相同的缓存挂载(带有子路径)时,会出现竞争条件。这种竞争条件可能导致主机系统的文件被容器内的进程访问。 + - **漏洞效果**:此漏洞破坏了容器与主机之间的隔离性,攻击者可能利用此漏洞访问或泄露主机上的敏感文件,从而对主机系统造成潜在威胁。 + +cve: ./data/2024/23xxx/CVE-2024-23652.json +BuildKit is a toolkit for converting source code to build artifacts in an efficient, expressive and repeatable manner. A malicious BuildKit frontend or Dockerfile using RUN --mount could trick the feature that removes empty files created for the mountpoints into removing a file outside the container, from the host system. The issue has been fixed in v0.12.5. Workarounds include avoiding using BuildKit frontends from an untrusted source or building an untrusted Dockerfile containing RUN --mount feature. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与容器相关。它涉及BuildKit在处理`RUN --mount`功能时的行为,可能导致容器内操作影响到主机系统文件,破坏了容器与主机之间的隔离。 + +2. **程序漏洞分析**: + - **程序**:这是BuildKit工具的漏洞,而非内核Kernel或容器内部运行的应用。 + - **漏洞发生原因**:BuildKit在实现`RUN --mount`功能时,存在逻辑缺陷。当创建用于挂载点的空文件后,这些文件会被删除。然而,由于路径处理不当,攻击者可以利用恶意构造的BuildKit前端或Dockerfile,诱使BuildKit删除主机系统上的文件,而非仅限于容器内的文件。 + - **效果**:此漏洞允许攻击者通过精心构造的Dockerfile或BuildKit前端,在容器中执行操作时删除主机系统上的任意文件,从而破坏容器隔离性,对主机系统造成潜在的安全威胁。 + +cve: ./data/2024/24xxx/CVE-2024-24557.json +Moby is an open-source project created by Docker to enable software containerization. The classic builder cache system is prone to cache poisoning if the image is built FROM scratch. Also, changes to some instructions (most important being HEALTHCHECK and ONBUILD) would not cause a cache miss. An attacker with the knowledge of the Dockerfile someone is using could poison their cache by making them pull a specially crafted image that would be considered as a valid cache candidate for some build steps. 23.0+ users are only affected if they explicitly opted out of Buildkit (DOCKER_BUILDKIT=0 environment variable) or are using the /build API endpoint. All users on versions older than 23.0 could be impacted. Image build API endpoint (/build) and ImageBuild function from github.com/docker/docker/client is also affected as it the uses classic builder by default. Patches are included in 24.0.9 and 25.0.2 releases. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器技术相关,具体涉及 Moby(Docker 的开源项目)中的经典构建器缓存系统。 + +2. **程序漏洞分析**: + - **程序**:这是 Docker 容器实现中的漏洞,具体影响 Moby(Docker 的开源组件)。 + - **漏洞发生原因**:Moby 的经典构建器缓存系统存在设计缺陷,当从 `scratch` 构建镜像时,可能遭受缓存投毒攻击。此外,某些指令(如 `HEALTHCHECK` 和 `ONBUILD`)的更改不会导致缓存失效,从而允许攻击者通过精心构造的镜像污染构建缓存。 + - **效果**:攻击者可以利用此漏洞,在目标用户的构建过程中插入恶意代码或配置,进而影响生成的容器镜像的安全性。这种污染可能在后续的容器运行中导致未授权的行为或安全风险。 + +总结:该 CVE 与容器技术相关,影响 Docker 的经典构建器缓存机制,可能导致缓存投毒攻击,影响容器镜像的安全性。 + +cve: ./data/2024/24xxx/CVE-2024-24760.json +mailcow is a dockerized email package, with multiple containers linked in one bridged network. A security vulnerability has been identified in mailcow affecting versions < 2024-01c. This vulnerability potentially allows attackers on the same subnet to connect to exposed ports of a Docker container, even when the port is bound to 127.0.0.1. The vulnerability has been addressed by implementing additional iptables/nftables rules. These rules drop packets for Docker containers on ports 3306, 6379, 8983, and 12345, where the input interface is not `br-mailcow` and the output interface is `br-mailcow`. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器和隔离相关。它涉及到 Docker 容器网络配置中的漏洞,具体是关于容器之间的网络隔离问题。 + +2. **程序漏洞分析**: + - **程序**:这是 mailcow 的漏洞,mailcow 是一个基于 Docker 的电子邮件解决方案。因此,该漏洞主要影响的是容器实现(Docker)以及容器内部运行的应用。 + - **漏洞发生原因**:在 mailcow 的默认配置中,即使某些服务的端口绑定到 `127.0.0.1`(通常表示仅允许本地访问),这些端口仍然可能被同一子网内的攻击者访问。这是因为 Docker 的桥接网络 (`br-mailcow`) 没有正确限制外部流量。 + - **效果**:此漏洞可能导致同一子网内的攻击者能够未经授权访问本应仅限于本地访问的服务(如数据库、缓存服务等),从而泄露敏感数据或进一步利用其他漏洞进行攻击。 + +总结:该 CVE 与容器和隔离相关,涉及 mailcow 的 Docker 网络配置问题,可能导致容器间的网络隔离失效,使攻击者可以访问本应隔离的服务端口。 + +cve: ./data/2024/26xxx/CVE-2024-26634.json +In the Linux kernel, the following vulnerability has been resolved: + +net: fix removing a namespace with conflicting altnames + +Mark reports a BUG() when a net namespace is removed. + + kernel BUG at net/core/dev.c:11520! + +Physical interfaces moved outside of init_net get "refunded" +to init_net when that namespace disappears. The main interface +name may get overwritten in the process if it would have +conflicted. We need to also discard all conflicting altnames. +Recent fixes addressed ensuring that altnames get moved +with the main interface, which surfaced this problem. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 相关。具体来说,它涉及网络命名空间(net namespace)的删除操作。 + +2. **漏洞所在的程序及分析**: + - 这是 Linux 内核(Kernel)中的一个漏洞。 + - 漏洞发生的原因是在删除网络命名空间时,如果存在冲突的备用名称(altnames),内核没有正确处理这些冲突,导致可能触发内核 BUG。 + - 具体效果是,当网络命名空间被移除时,物理接口可能会被“退还”到初始网络命名空间(init_net)。如果在退还过程中,接口名称或备用名称发生冲突,则可能导致内核崩溃(kernel BUG)。 + - 此问题的影响在于,攻击者可能利用此漏洞导致系统不稳定或拒绝服务(DoS)。 + +总结:这是一个与网络命名空间相关的 Linux 内核漏洞,可能导致内核崩溃。 + +cve: ./data/2024/26xxx/CVE-2024-26865.json +In the Linux kernel, the following vulnerability has been resolved: + +rds: tcp: Fix use-after-free of net in reqsk_timer_handler(). + +syzkaller reported a warning of netns tracker [0] followed by KASAN +splat [1] and another ref tracker warning [1]. + +syzkaller could not find a repro, but in the log, the only suspicious +sequence was as follows: + + 18:26:22 executing program 1: + r0 = socket$inet6_mptcp(0xa, 0x1, 0x106) + ... + connect$inet6(r0, &(0x7f0000000080)={0xa, 0x4001, 0x0, @loopback}, 0x1c) (async) + +The notable thing here is 0x4001 in connect(), which is RDS_TCP_PORT. + +So, the scenario would be: + + 1. unshare(CLONE_NEWNET) creates a per netns tcp listener in + rds_tcp_listen_init(). + 2. syz-executor connect()s to it and creates a reqsk. + 3. syz-executor exit()s immediately. + 4. netns is dismantled. [0] + 5. reqsk timer is fired, and UAF happens while freeing reqsk. [1] + 6. listener is freed after RCU grace period. [2] + +Basically, reqsk assumes that the listener guarantees netns safety +until all reqsk timers are expired by holding the listener's refcount. +However, this was not the case for kernel sockets. + +Commit 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in +inet_twsk_purge()") fixed this issue only for per-netns ehash. + +Let's apply the same fix for the global ehash. + +[0]: +ref_tracker: net notrefcnt@0000000065449cc3 has 1/1 users at + sk_alloc (./include/net/net_namespace.h:337 net/core/sock.c:2146) + inet6_create (net/ipv6/af_inet6.c:192 net/ipv6/af_inet6.c:119) + __sock_create (net/socket.c:1572) + rds_tcp_listen_init (net/rds/tcp_listen.c:279) + rds_tcp_init_net (net/rds/tcp.c:577) + ops_init (net/core/net_namespace.c:137) + setup_net (net/core/net_namespace.c:340) + copy_net_ns (net/core/net_namespace.c:497) + create_new_namespaces (kernel/nsproxy.c:110) + unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) + ksys_unshare (kernel/fork.c:3429) + __x64_sys_unshare (kernel/fork.c:3496) + do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) +... +WARNING: CPU: 0 PID: 27 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179) + +[1]: +BUG: KASAN: slab-use-after-free in inet_csk_reqsk_queue_drop (./include/net/inet_hashtables.h:180 net/ipv4/inet_connection_sock.c:952 net/ipv4/inet_connection_sock.c:966) +Read of size 8 at addr ffff88801b370400 by task swapper/0/0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Call Trace: + + dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) + print_report (mm/kasan/report.c:378 mm/kasan/report.c:488) + kasan_report (mm/kasan/report.c:603) + inet_csk_reqsk_queue_drop (./include/net/inet_hashtables.h:180 net/ipv4/inet_connection_sock.c:952 net/ipv4/inet_connection_sock.c:966) + reqsk_timer_handler (net/ipv4/inet_connection_sock.c:979 net/ipv4/inet_connection_sock.c:1092) + call_timer_fn (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/timer.h:127 kernel/time/timer.c:1701) + __run_timers.part.0 (kernel/time/timer.c:1752 kernel/time/timer.c:2038) + run_timer_softirq (kernel/time/timer.c:2053) + __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) + irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632 kernel/softirq.c:644) + sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1076 (discriminator 14)) + + +Allocated by task 258 on cpu 0 at 83.612050s: + kasan_save_stack (mm/kasan/common.c:48) + kasan_save_track (mm/kasan/common.c:68) + __kasan_slab_alloc (mm/kasan/common.c:343) + kmem_cache_alloc (mm/slub.c:3813 mm/slub.c:3860 mm/slub.c:3867) + copy_net_ns (./include/linux/slab.h:701 net/core/net_namespace.c:421 net/core/net_namespace.c:480) + create_new_namespaces (kernel/nsproxy.c:110) + unshare_nsproxy_name +---truncated--- + +analysis: 1. **是否与 namespace、cgroup、container 或容器隔离相关**: + 是的,该 CVE 与 namespace 和隔离相关。具体来说,它涉及 `CLONE_NEWNET` 系统调用创建一个新的网络命名空间(netns),并在该命名空间中处理资源引用计数时出现的问题。 + +2. **这是什么程序的漏洞**: + 这是 Linux 内核(Kernel)中的漏洞。漏洞发生在 RDS(Reliable Datagram Sockets)协议的 TCP 实现中,当在网络命名空间被销毁后,仍然存在对已释放资源的访问(Use-After-Free, UAF)。 + + **漏洞发生的原因**: + - 当通过 `unshare(CLONE_NEWNET)` 创建一个新的网络命名空间时,RDS 协议会初始化一个监听器(listener)。 + - 如果在这个命名空间中创建了一个请求套接字(reqsk),然后迅速退出进程,导致网络命名空间被销毁。 + - 在网络命名空间被销毁后,定时器触发时尝试释放 reqsk,但由于引用计数管理不当,此时已经发生了 Use-After-Free。 + - 结果是内核可能会崩溃或行为异常。 + +3. **漏洞的效果**: + - 攻击者可能利用此漏洞导致内核崩溃(拒绝服务攻击,DoS)。 + - 在某些情况下,可能进一步被利用来提升权限或执行任意代码。 + +cve: ./data/2024/29xxx/CVE-2024-29018.json +Moby is an open source container framework that is a key component of Docker Engine, Docker Desktop, and other distributions of container tooling or runtimes. Moby's networking implementation allows for many networks, each with their own IP address range and gateway, to be defined. This feature is frequently referred to as custom networks, as each network can have a different driver, set of parameters and thus behaviors. When creating a network, the `--internal` flag is used to designate a network as _internal_. The `internal` attribute in a docker-compose.yml file may also be used to mark a network _internal_, and other API clients may specify the `internal` parameter as well. + +When containers with networking are created, they are assigned unique network interfaces and IP addresses. The host serves as a router for non-internal networks, with a gateway IP that provides SNAT/DNAT to/from container IPs. + +Containers on an internal network may communicate between each other, but are precluded from communicating with any networks the host has access to (LAN or WAN) as no default route is configured, and firewall rules are set up to drop all outgoing traffic. Communication with the gateway IP address (and thus appropriately configured host services) is possible, and the host may communicate with any container IP directly. + +In addition to configuring the Linux kernel's various networking features to enable container networking, `dockerd` directly provides some services to container networks. Principal among these is serving as a resolver, enabling service discovery, and resolution of names from an upstream resolver. + +When a DNS request for a name that does not correspond to a container is received, the request is forwarded to the configured upstream resolver. This request is made from the container's network namespace: the level of access and routing of traffic is the same as if the request was made by the container itself. + +As a consequence of this design, containers solely attached to an internal network will be unable to resolve names using the upstream resolver, as the container itself is unable to communicate with that nameserver. Only the names of containers also attached to the internal network are able to be resolved. + +Many systems run a local forwarding DNS resolver. As the host and any containers have separate loopback devices, a consequence of the design described above is that containers are unable to resolve names from the host's configured resolver, as they cannot reach these addresses on the host loopback device. To bridge this gap, and to allow containers to properly resolve names even when a local forwarding resolver is used on a loopback address, `dockerd` detects this scenario and instead forward DNS requests from the host namework namespace. The loopback resolver then forwards the requests to its configured upstream resolvers, as expected. + +Because `dockerd` forwards DNS requests to the host loopback device, bypassing the container network namespace's normal routing semantics entirely, internal networks can unexpectedly forward DNS requests to an external nameserver. By registering a domain for which they control the authoritative nameservers, an attacker could arrange for a compromised container to exfiltrate data by encoding it in DNS queries that will eventually be answered by their nameservers. + +Docker Desktop is not affected, as Docker Desktop always runs an internal resolver on a RFC 1918 address. + +Moby releases 26.0.0, 25.0.4, and 23.0.11 are patched to prevent forwarding any DNS requests from internal networks. As a workaround, run containers intended to be solely attached to internal networks with a custom upstream address, which will force all upstream DNS queries to be resolved from the container's network namespace. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + - 是的,这个 CVE 与 namespace 和 container 隔离密切相关。问题发生在 Docker 的 `dockerd` 在处理内部网络(internal network)时,绕过了容器网络命名空间的正常路由规则,导致内部网络的 DNS 请求被转发到主机的 loopback 设备,从而破坏了内部网络的隔离性。 + +2. **这是什么程序的漏洞:** + - 这是 **Moby(Docker Engine 的核心组件)** 的漏洞。 + - **漏洞发生原因:** + 当容器仅连接到内部网络(internal network)时,`dockerd` 会检测到主机上运行的本地转发 DNS 解析器,并将 DNS 请求从容器的网络命名空间转发到主机的 loopback 设备。这种设计破坏了内部网络的隔离性,因为内部网络的容器本应无法访问外部网络或主机上的非 loopback 网络资源。 + - **漏洞效果:** + 攻击者可以通过控制一个权威域名服务器,诱导受影响的容器发送包含敏感数据的 DNS 查询,从而实现数据外泄。这种行为违背了内部网络的设计初衷,即防止容器与主机外部网络通信。 + +3. **结论:** + - 该漏洞影响 Moby 的容器网络实现,特别是内部网络的 DNS 解析逻辑。 + - 它破坏了容器网络命名空间的隔离性,可能导致敏感数据通过 DNS 查询泄露。 + +cve: ./data/2024/29xxx/CVE-2024-29967.json +In Brocade SANnav before Brocade SANnav v2.31 and v2.3.0a, it was observed that Docker instances inside the appliance have insecure mount points, allowing reading and writing access to sensitive files. The vulnerability could allow a sudo privileged user on the host OS to read and write access to these files. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,该CVE信息与容器(Docker)和隔离相关。 + +2. **程序漏洞分析**: + - 这是容器实现(Docker)相关的漏洞。 + - 漏洞发生的原因是Docker实例在Brocade SANnav设备内部有不安全的挂载点,这些挂载点允许对敏感文件进行读写访问。 + - 效果:该漏洞可能使主机操作系统上具有sudo权限的用户能够读取和写入这些敏感文件,从而破坏容器的隔离性,并可能导致敏感信息泄露或系统被进一步攻击。 + +cve: ./data/2024/31xxx/CVE-2024-31419.json +An information disclosure flaw was found in OpenShift Virtualization. The DownwardMetrics feature was introduced to expose host metrics to virtual machine guests and is enabled by default. This issue could expose limited host metrics of a node to any guest in any namespace without being explicitly enabled by an administrator. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 和隔离相关。问题涉及将主机的指标(host metrics)暴露给虚拟机客人(guests),并且这种暴露是跨命名空间(namespace)的,而没有适当的隔离控制。 + +2. **程序漏洞分析**: + - **程序**:这是 OpenShift Virtualization 的漏洞,OpenShift Virtualization 是基于 Kubernetes 的平台,用于管理虚拟机和容器化工作负载。 + - **漏洞发生原因**:DownwardMetrics 功能被设计用来向虚拟机客人暴露主机的指标,但该功能默认启用,并且未正确限制访问权限。结果是,即使管理员没有显式配置,任何命名空间中的虚拟机客人都可以访问这些主机指标。 + - **效果**:此漏洞可能导致敏感的主机指标信息(如 CPU 使用率、内存使用情况等)被无意中泄露给未经授权的虚拟机客人。这可能帮助攻击者收集有关主机环境的信息,从而为进一步攻击提供便利。 + +cve: ./data/2024/31xxx/CVE-2024-31989.json +Argo CD is a declarative, GitOps continuous delivery tool for Kubernetes. It has been discovered that an unprivileged pod in a different namespace on the same cluster could connect to the Redis server on port 6379. Despite having installed the latest version of the VPC CNI plugin on the EKS cluster, it requires manual enablement through configuration to enforce network policies. This raises concerns that many clients might unknowingly have open access to their Redis servers. This vulnerability could lead to Privilege Escalation to the level of cluster controller, or to information leakage, affecting anyone who does not have strict access controls on their Redis instance. This issue has been patched in version(s) 2.8.19, 2.9.15 and 2.10.10. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,此 CVE 与 namespace 和容器隔离相关。问题涉及 Kubernetes 集群中不同命名空间(namespace)之间的网络隔离不足,导致未授权的 Pod 可以访问 Redis 服务器。 + +2. **漏洞所属程序及影响分析**: + - **程序**:这是 Argo CD 的漏洞,Argo CD 是一个运行在 Kubernetes 上的 GitOps 工具。 + - **漏洞发生原因**:尽管安装了 VPC CNI 插件,但默认情况下并未强制执行网络策略(Network Policies),这使得不同命名空间中的未特权 Pod 能够连接到 Redis 服务器。 + - **效果**: + - **权限提升**:攻击者可能通过此漏洞获得对 Redis 服务器的访问权限,并进一步利用 Redis 的功能实现集群级别的控制。 + - **信息泄露**:如果 Redis 中存储了敏感数据,这些数据可能会被未授权访问。 + - **影响范围**:所有未正确配置 Redis 访问控制的 Argo CD 用户都可能受到影响。 + +cve: ./data/2024/31xxx/CVE-2024-31994.json +Mealie is a self hosted recipe manager and meal planner. Prior to 1.4.0, an attacker can point the image request to an arbitrarily large file. Mealie will attempt to retrieve this file in whole. If it can be retrieved, it may be stored on the file system in whole (leading to possible disk consumption), however the more likely scenario given resource limitations is that the container will OOM during file retrieval if the target file size is greater than the allocated memory of the container. At best this can be used to force the container to infinitely restart due to OOM (if so configured in `docker-compose.yml), or at worst this can be used to force the Mealie container to crash and remain offline. In the event that the file can be retrieved, the lack of rate limiting on this endpoint also permits an attacker to generate ongoing requests to any target of their choice, potentially contributing to an external-facing DoS attack. This vulnerability is fixed in 1.4.0. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器相关。问题描述中明确提到容器可能会由于内存耗尽 (OOM) 而崩溃或无限重启,并且涉及到容器资源限制的配置 (`docker-compose.yml`)。 + +2. **程序漏洞分析**: + - **程序**:这是一个名为 Mealie 的应用程序的漏洞。Mealie 是一个自托管的食谱管理器和餐饮计划工具。 + - **漏洞发生原因**:在版本 1.4.0 之前,Mealie 对图像请求没有进行适当的大小限制或验证,允许攻击者指定一个任意大的文件作为图像请求的目标。如果目标文件可以被检索到,Mealie 会尝试将其完整地存储在文件系统上,这可能导致磁盘空间耗尽。更常见的情况是,由于容器内存资源限制,当目标文件大小超过容器分配的内存时,容器会发生 OOM(Out of Memory)错误。 + - **漏洞效果**: + 1. 如果容器配置为在 OOM 后自动重启,则攻击者可以通过触发 OOM 错误使容器无限重启,导致服务不可用。 + 2. 如果容器没有自动重启配置,则攻击者可以直接导致容器崩溃并保持离线状态。 + 3. 此外,由于该端点缺乏速率限制,攻击者还可以通过生成大量请求来发起对外部目标的 DoS 攻击。 + +总结:这是一个容器内部运行的应用程序(Mealie)的漏洞,主要影响容器的稳定性和资源使用,可能引发 OOM 或 DoS 攻击。 + +cve: ./data/2024/32xxx/CVE-2024-32473.json +Moby is an open source container framework that is a key component of Docker Engine, Docker Desktop, and other distributions of container tooling or runtimes. In 26.0.0, IPv6 is not disabled on network interfaces, including those belonging to networks where `--ipv6=false`. An container with an `ipvlan` or `macvlan` interface will normally be configured to share an external network link with the host machine. Because of this direct access, (1) Containers may be able to communicate with other hosts on the local network over link-local IPv6 addresses, (2) if router advertisements are being broadcast over the local network, containers may get SLAAC-assigned addresses, and (3) the interface will be a member of IPv6 multicast groups. This means interfaces in IPv4-only networks present an unexpectedly and unnecessarily increased attack surface. The issue is patched in 26.0.2. To completely disable IPv6 in a container, use `--sysctl=net.ipv6.conf.all.disable_ipv6=1` in the `docker create` or `docker run` command. Or, in the service configuration of a `compose` file. + +analysis: ### 分析结果 + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关** + 是的,该 CVE 与容器和隔离相关。具体来说,它涉及容器网络接口的配置问题,特别是 `ipvlan` 和 `macvlan` 类型的网络接口,这些接口允许容器直接访问主机外部网络。 + +2. **漏洞所属程序及影响分析** + - **程序**: 这是 Moby(Docker 的核心组件)中的漏洞。 + - **漏洞发生原因**: 在 Moby 26.0.0 版本中,即使在创建网络时明确设置了 `--ipv6=false`,IPv6 仍然未被正确禁用。这导致使用 `ipvlan` 或 `macvlan` 类型网络接口的容器在网络接口上保留了 IPv6 功能。 + - **效果**: + 1. 容器可能通过链路本地 IPv6 地址与其他本地网络中的主机通信,即使该网络被配置为仅支持 IPv4。 + 2. 如果本地网络广播路由器通告(Router Advertisements),容器可能会通过无状态地址自动配置(SLAAC)获得 IPv6 地址。 + 3. 容器的网络接口会成为 IPv6 多播组的成员,增加了潜在的攻击面。 + - **后果**: 该漏洞破坏了预期的网络隔离,使得原本应该仅限于 IPv4 的网络暴露于 IPv6 流量,从而增加了攻击的可能性。 + +### 结论 +该 CVE 与容器和隔离相关,属于 Moby 的漏洞,影响容器的网络配置,可能导致意外的网络暴露和攻击面增加。 + +cve: ./data/2024/35xxx/CVE-2024-35139.json +IBM Security Access Manager Docker 10.0.0.0 through 10.0.7.1 could allow a local user to obtain sensitive information from the container due to incorrect default permissions. IBM X-Force ID: 292415. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是IBM Security Access Manager Docker的漏洞。该漏洞发生在容器默认权限配置不正确的情况下,导致本地用户可以从容器中获取敏感信息。这是一个容器实现(Docker)相关的漏洞,其效果是可能导致敏感信息泄露。 + +cve: ./data/2024/35xxx/CVE-2024-35846.json +In the Linux kernel, the following vulnerability has been resolved: + +mm: zswap: fix shrinker NULL crash with cgroup_disable=memory + +Christian reports a NULL deref in zswap that he bisected down to the zswap +shrinker. The issue also cropped up in the bug trackers of libguestfs [1] +and the Red Hat bugzilla [2]. + +The problem is that when memcg is disabled with the boot time flag, the +zswap shrinker might get called with sc->memcg == NULL. This is okay in +many places, like the lruvec operations. But it crashes in +memcg_page_state() - which is only used due to the non-node accounting of +cgroup's the zswap memory to begin with. + +Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I +was then able to reproduce the crash locally as well. + +[1] https://github.com/libguestfs/libguestfs/issues/139 +[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252 + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,该CVE与cgroup(控制组)相关。 + +2. **程序漏洞分析**: + - 这是**Linux内核(Kernel)**的漏洞。 + - 漏洞发生在`zswap`模块的`shrinker`功能中。当系统使用`cgroup_disable=memory`引导参数禁用内存控制组(memcg)时,`zswap shrinker`可能会被调用,并且`sc->memcg`为`NULL`。在这种情况下,`memcg_page_state()`函数会尝试访问`NULL`指针,从而导致崩溃。 + - **效果**:此漏洞可能导致系统在特定条件下发生空指针解引用崩溃,影响系统的稳定性和可用性。特别是对于启用了`zswap`并禁用了内存控制组的系统,可能引发不可预期的内核崩溃。 + +总结:这是一个与cgroup相关的Linux内核漏洞,涉及`zswap`模块的`shrinker`功能,在特定配置下会导致内核崩溃。 + +cve: ./data/2024/35xxx/CVE-2024-35894.json +In the Linux kernel, the following vulnerability has been resolved: + +mptcp: prevent BPF accessing lowat from a subflow socket. + +Alexei reported the following splat: + + WARNING: CPU: 32 PID: 3276 at net/mptcp/subflow.c:1430 subflow_data_ready+0x147/0x1c0 + Modules linked in: dummy bpf_testmod(O) [last unloaded: bpf_test_no_cfi(O)] + CPU: 32 PID: 3276 Comm: test_progs Tainted: GO 6.8.0-12873-g2c43c33bfd23 + Call Trace: + + mptcp_set_rcvlowat+0x79/0x1d0 + sk_setsockopt+0x6c0/0x1540 + __bpf_setsockopt+0x6f/0x90 + bpf_sock_ops_setsockopt+0x3c/0x90 + bpf_prog_509ce5db2c7f9981_bpf_test_sockopt_int+0xb4/0x11b + bpf_prog_dce07e362d941d2b_bpf_test_socket_sockopt+0x12b/0x132 + bpf_prog_348c9b5faaf10092_skops_sockopt+0x954/0xe86 + __cgroup_bpf_run_filter_sock_ops+0xbc/0x250 + tcp_connect+0x879/0x1160 + tcp_v6_connect+0x50c/0x870 + mptcp_connect+0x129/0x280 + __inet_stream_connect+0xce/0x370 + inet_stream_connect+0x36/0x50 + bpf_trampoline_6442491565+0x49/0xef + inet_stream_connect+0x5/0x50 + __sys_connect+0x63/0x90 + __x64_sys_connect+0x14/0x20 + +The root cause of the issue is that bpf allows accessing mptcp-level +proto_ops from a tcp subflow scope. + +Fix the issue detecting the problematic call and preventing any action. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + N/A + +2. **程序漏洞分析**: + - 这是 **Linux 内核 (Kernel)** 的一个漏洞,具体发生在 MultiPath TCP (MPTCP) 模块中。 + - **漏洞发生原因**:BPF 程序可以访问到 MPTCP 层级的 `proto_ops`,而这通常应该被限制在 TCP 子流的作用域内。这种不当的访问可能导致未定义行为或系统崩溃(如报告中的警告信息所示)。 + - **效果**:攻击者可能利用此漏洞触发内核崩溃(导致拒绝服务攻击),或者潜在地通过 BPF 程序获取敏感信息或执行未经授权的操作。 + +3. **总结**: + N/A + +cve: ./data/2024/35xxx/CVE-2024-35896.json +In the Linux kernel, the following vulnerability has been resolved: + +netfilter: validate user input for expected length + +I got multiple syzbot reports showing old bugs exposed +by BPF after commit 20f2505fb436 ("bpf: Try to avoid kzalloc +in cgroup/{s,g}etsockopt") + +setsockopt() @optlen argument should be taken into account +before copying data. + + BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] + BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] + BUG: KASAN: slab-out-of-bounds in do_replace net/ipv4/netfilter/ip_tables.c:1111 [inline] + BUG: KASAN: slab-out-of-bounds in do_ipt_set_ctl+0x902/0x3dd0 net/ipv4/netfilter/ip_tables.c:1627 +Read of size 96 at addr ffff88802cd73da0 by task syz-executor.4/7238 + +CPU: 1 PID: 7238 Comm: syz-executor.4 Not tainted 6.9.0-rc2-next-20240403-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 + print_address_description mm/kasan/report.c:377 [inline] + print_report+0x169/0x550 mm/kasan/report.c:488 + kasan_report+0x143/0x180 mm/kasan/report.c:601 + kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 + __asan_memcpy+0x29/0x70 mm/kasan/shadow.c:105 + copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] + copy_from_sockptr include/linux/sockptr.h:55 [inline] + do_replace net/ipv4/netfilter/ip_tables.c:1111 [inline] + do_ipt_set_ctl+0x902/0x3dd0 net/ipv4/netfilter/ip_tables.c:1627 + nf_setsockopt+0x295/0x2c0 net/netfilter/nf_sockopt.c:101 + do_sock_setsockopt+0x3af/0x720 net/socket.c:2311 + __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 + __do_sys_setsockopt net/socket.c:2343 [inline] + __se_sys_setsockopt net/socket.c:2340 [inline] + __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 + do_syscall_64+0xfb/0x240 + entry_SYSCALL_64_after_hwframe+0x72/0x7a +RIP: 0033:0x7fd22067dde9 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007fd21f9ff0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 +RAX: ffffffffffffffda RBX: 00007fd2207abf80 RCX: 00007fd22067dde9 +RDX: 0000000000000040 RSI: 0000000000000000 RDI: 0000000000000003 +RBP: 00007fd2206ca47a R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000020000880 R11: 0000000000000246 R12: 0000000000000000 +R13: 000000000000000b R14: 00007fd2207abf80 R15: 00007ffd2d0170d8 + + +Allocated by task 7238: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 + poison_kmalloc_redzone mm/kasan/common.c:370 [inline] + __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387 + kasan_kmalloc include/linux/kasan.h:211 [inline] + __do_kmalloc_node mm/slub.c:4069 [inline] + __kmalloc_noprof+0x200/0x410 mm/slub.c:4082 + kmalloc_noprof include/linux/slab.h:664 [inline] + __cgroup_bpf_run_filter_setsockopt+0xd47/0x1050 kernel/bpf/cgroup.c:1869 + do_sock_setsockopt+0x6b4/0x720 net/socket.c:2293 + __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 + __do_sys_setsockopt net/socket.c:2343 [inline] + __se_sys_setsockopt net/socket.c:2340 [inline] + __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 + do_syscall_64+0xfb/0x240 + entry_SYSCALL_64_after_hwframe+0x72/0x7a + +The buggy address belongs to the object at ffff88802cd73da0 + which belongs to the cache kmalloc-8 of size 8 +The buggy address is located 0 bytes inside of + allocated 1-byte region [ffff88802cd73da0, ffff88802cd73da1) + +The buggy address belongs to the physical page: +page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88802cd73020 pfn:0x2cd73 +flags: 0xfff80000000000(node=0|zone=1|lastcpupid=0xfff) +page_type: 0xffffefff(slab) +raw: 00fff80000000000 ffff888015041280 dead000000000100 dead000000000122 +raw: ffff88802cd73020 000000008080007f 00000001ffffefff 00 +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,此漏洞与 `cgroup` 和 `BPF` 相关。从描述中可以看到,漏洞涉及 `kernel/bpf/cgroup.c` 文件中的代码路径 (`__cgroup_bpf_run_filter_setsockopt`),这表明它与 cgroup 的 BPF 策略执行有关。 + +2. **这是什么程序的漏洞,如何发生,有何效果**: + - **程序**:Linux 内核 (Kernel)。 + - **漏洞原因**:在处理 `setsockopt()` 调用时,内核未正确验证用户提供的 `@optlen` 参数长度,导致在调用 `copy_from_sockptr_offset` 函数时可能发生越界读取(slab-out-of-bounds)。此问题被 BPF 程序触发,特别是在 cgroup 的 sockopt 策略应用过程中。 + - **效果**:攻击者可以通过精心构造的 `setsockopt()` 调用来触发内存越界访问,可能导致信息泄露或系统崩溃。由于此漏洞发生在内核空间,攻击者可能利用该漏洞进一步提升权限或破坏系统稳定性。 + +总结:此 CVE 与 cgroup 和 BPF 相关,是 Linux 内核中的一个漏洞,可能导致内存越界读取,进而引发信息泄露或系统崩溃。 + +cve: ./data/2024/35xxx/CVE-2024-35934.json +In the Linux kernel, the following vulnerability has been resolved: + +net/smc: reduce rtnl pressure in smc_pnet_create_pnetids_list() + +Many syzbot reports show extreme rtnl pressure, and many of them hint +that smc acquires rtnl in netns creation for no good reason [1] + +This patch returns early from smc_pnet_net_init() +if there is no netdevice yet. + +I am not even sure why smc_pnet_create_pnetids_list() even exists, +because smc_pnet_netdev_event() is also calling +smc_pnet_add_base_pnetid() when handling NETDEV_UP event. + +[1] extract of typical syzbot reports + +2 locks held by syz-executor.3/12252: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.4/12253: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.1/12257: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.2/12261: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.0/12265: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.3/12268: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.4/12271: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.1/12274: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 +2 locks held by syz-executor.2/12280: + #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] + #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与 namespace 相关。具体来说,问题出现在 `net/smc` 模块中与网络命名空间(netns)创建相关的代码路径中。`smc_pnet_create_pnetids_list()` 函数在创建网络命名空间时获取了不必要的 rtnl 锁,导致性能压力和潜在的竞争条件。 + +2. **这是什么程序的漏洞:** + 这是 Linux 内核(Kernel)中的漏洞。 + - **漏洞发生原因:** 在网络命名空间创建过程中,`smc_pnet_net_init()` 函数调用了 `smc_pnet_create_pnetids_list()`,而后者获取了 rtnl 锁。然而,在某些情况下(例如尚未存在网络设备时),这种锁的获取是没有必要的。这会导致在高并发场景下,rtnl 锁的压力显著增加,从而影响系统性能。 + - **效果:** 该漏洞可能导致系统在创建大量网络命名空间时出现性能瓶颈,尤其是在容器化环境中(如 Docker 或 Kubernetes),由于频繁创建和销毁网络命名空间,可能会加剧这一问题。此外,极端情况下可能引发死锁或资源耗尽的情况。 + +cve: ./data/2024/35xxx/CVE-2024-35974.json +In the Linux kernel, the following vulnerability has been resolved: + +block: fix q->blkg_list corruption during disk rebind + +Multiple gendisk instances can allocated/added for single request queue +in case of disk rebind. blkg may still stay in q->blkg_list when calling +blkcg_init_disk() for rebind, then q->blkg_list becomes corrupted. + +Fix the list corruption issue by: + +- add blkg_init_queue() to initialize q->blkg_list & q->blkcg_mutex only +- move calling blkg_init_queue() into blk_alloc_queue() + +The list corruption should be started since commit f1c006f1c685 ("blk-cgroup: +synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()") +which delays removing blkg from q->blkg_list into blkg_free_workfn(). + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该漏洞与cgroup相关。描述中提到`blkcg_init_disk()`和`q->blkg_list`,这些都是块设备控制器组(blk-cgroup)相关的功能,而cgroup是Linux内核中的一个重要特性,用于资源限制和隔离,也是容器技术(如Docker)实现资源隔离的基础。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核(Kernel)的漏洞。 + - **漏洞发生原因**:在磁盘重新绑定(disk rebind)过程中,可能会为单个请求队列(request queue)分配/添加多个`gendisk`实例。如果在调用`blkcg_init_disk()`时,`blkg`仍然存在于`q->blkg_list`中,则会导致`q->blkg_list`被破坏。 + - **效果**:这种列表破坏问题可能导致内核崩溃或数据不一致,尤其是在涉及块设备控制器组(blk-cgroup)的操作中。由于cgroup是容器技术的重要组成部分,此漏洞可能会影响基于Linux内核的容器运行时(如Docker、Kubernetes等)的稳定性,进而影响容器的资源管理和隔离能力。 + +cve: ./data/2024/36xxx/CVE-2024-36000.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/hugetlb: fix missing hugetlb_lock for resv uncharge + +There is a recent report on UFFDIO_COPY over hugetlb: + +https://lore.kernel.org/all/000000000000ee06de0616177560@google.com/ + +350: lockdep_assert_held(&hugetlb_lock); + +Should be an issue in hugetlb but triggered in an userfault context, where +it goes into the unlikely path where two threads modifying the resv map +together. Mike has a fix in that path for resv uncharge but it looks like +the locking criteria was overlooked: hugetlb_cgroup_uncharge_folio_rsvd() +will update the cgroup pointer, so it requires to be called with the lock +held. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 cgroup 相关。问题涉及 `hugetlb_cgroup_uncharge_folio_rsvd()` 函数,该函数会更新 cgroup 指针,并且需要在持有锁的情况下调用。 + +2. **程序漏洞分析**: + - **漏洞所在程序**:这是 Linux 内核 (Kernel) 的漏洞,具体位于内存管理子系统中的 hugetlb(huge TLB)模块。 + - **漏洞发生原因**:在处理 UFFDIO_COPY(用户态故障注入复制操作)时,当涉及到 hugetlb 的资源释放(resv uncharge),内核未正确持有 `hugetlb_lock` 锁。这可能导致竞争条件,尤其是在多线程环境中,两个线程同时修改 resv map 时出现问题。 + - **漏洞效果**:此漏洞可能会导致内存管理的不一致状态,例如 cgroup 指针更新错误,从而引发潜在的系统崩溃或资源泄漏。虽然该问题本身不一定直接导致安全漏洞(如权限提升或信息泄露),但它可能间接影响系统的稳定性和可靠性,尤其是在容器化环境中使用 cgroup 管理资源时。 + +总结:该 CVE 与 cgroup 相关,是 Linux 内核中 hugetlb 模块的一个竞争条件漏洞,可能导致资源管理不一致和系统不稳定。 + +cve: ./data/2024/36xxx/CVE-2024-36907.json +In the Linux kernel, the following vulnerability has been resolved: + +SUNRPC: add a missing rpc_stat for TCP TLS + +Commit 1548036ef120 ("nfs: make the rpc_stat per net namespace") added +functionality to specify rpc_stats function but missed adding it to the +TCP TLS functionality. As the result, mounting with xprtsec=tls lead to +the following kernel oops. + +[ 128.984192] Unable to handle kernel NULL pointer dereference at +virtual address 000000000000001c +[ 128.985058] Mem abort info: +[ 128.985372] ESR = 0x0000000096000004 +[ 128.985709] EC = 0x25: DABT (current EL), IL = 32 bits +[ 128.986176] SET = 0, FnV = 0 +[ 128.986521] EA = 0, S1PTW = 0 +[ 128.986804] FSC = 0x04: level 0 translation fault +[ 128.987229] Data abort info: +[ 128.987597] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 +[ 128.988169] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 +[ 128.988811] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 +[ 128.989302] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000106c84000 +[ 128.990048] [000000000000001c] pgd=0000000000000000, p4d=0000000000000000 +[ 128.990736] Internal error: Oops: 0000000096000004 [#1] SMP +[ 128.991168] Modules linked in: nfs_layout_nfsv41_files +rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace netfs +uinput dm_mod nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib +nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct +nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill +ip_set nf_tables nfnetlink qrtr vsock_loopback +vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock +sunrpc vfat fat uvcvideo videobuf2_vmalloc videobuf2_memops uvc +videobuf2_v4l2 videodev videobuf2_common mc vmw_vmci xfs libcrc32c +e1000e crct10dif_ce ghash_ce sha2_ce vmwgfx nvme sha256_arm64 +nvme_core sr_mod cdrom sha1_ce drm_ttm_helper ttm drm_kms_helper drm +sg fuse +[ 128.996466] CPU: 0 PID: 179 Comm: kworker/u4:26 Kdump: loaded Not +tainted 6.8.0-rc6+ #12 +[ 128.997226] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS +VMW201.00V.21805430.BA64.2305221830 05/22/2023 +[ 128.998084] Workqueue: xprtiod xs_tcp_tls_setup_socket [sunrpc] +[ 128.998701] pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) +[ 128.999384] pc : call_start+0x74/0x138 [sunrpc] +[ 128.999809] lr : __rpc_execute+0xb8/0x3e0 [sunrpc] +[ 129.000244] sp : ffff8000832b3a00 +[ 129.000508] x29: ffff8000832b3a00 x28: ffff800081ac79c0 x27: ffff800081ac7000 +[ 129.001111] x26: 0000000004248060 x25: 0000000000000000 x24: ffff800081596008 +[ 129.001757] x23: ffff80007b087240 x22: ffff00009a509d30 x21: 0000000000000000 +[ 129.002345] x20: ffff000090075600 x19: ffff00009a509d00 x18: ffffffffffffffff +[ 129.002912] x17: 733d4d4554535953 x16: 42555300312d746e x15: ffff8000832b3a88 +[ 129.003464] x14: ffffffffffffffff x13: ffff8000832b3a7d x12: 0000000000000008 +[ 129.004021] x11: 0101010101010101 x10: ffff8000150cb560 x9 : ffff80007b087c00 +[ 129.004577] x8 : ffff00009a509de0 x7 : 0000000000000000 x6 : 00000000be8c4ee3 +[ 129.005026] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000094d56680 +[ 129.005425] x2 : ffff80007b0637f8 x1 : ffff000090075600 x0 : ffff00009a509d00 +[ 129.005824] Call trace: +[ 129.005967] call_start+0x74/0x138 [sunrpc] +[ 129.006233] __rpc_execute+0xb8/0x3e0 [sunrpc] +[ 129.006506] rpc_execute+0x160/0x1d8 [sunrpc] +[ 129.006778] rpc_run_task+0x148/0x1f8 [sunrpc] +[ 129.007204] tls_probe+0x80/0xd0 [sunrpc] +[ 129.007460] rpc_ping+0x28/0x80 [sunrpc] +[ 129.007715] rpc_create_xprt+0x134/0x1a0 [sunrpc] +[ 129.007999] rpc_create+0x128/0x2a0 [sunrpc] +[ 129.008264] xs_tcp_tls_setup_socket+0xdc/0x508 [sunrpc] +[ 129.008583] process_one_work+0x174/0x3c8 +[ 129.008813] worker_thread+0x2c8/0x3e0 +[ 129.009033] kthread+0x100/0x110 +[ 129.009225] ret_from_fork+0x10/0x20 +[ 129.009432] Code: f0ffffc2 911fe042 aa1403e1 aa1303e0 (b9401c83) + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞**: + 这是 Linux 内核 (Kernel) 的漏洞。具体来说,该漏洞发生在 SUNRPC(Sun Remote Procedure Call)模块中,涉及 TCP TLS 功能的实现。由于在引入 per-net namespace 的 rpc_stat 功能时遗漏了对 TCP TLS 的支持,导致在使用 `xprtsec=tls` 挂载选项时触发空指针解引用问题,进而引发内核崩溃 (kernel oops)。 + + **漏洞如何发生**: + 在挂载 NFS 文件系统并启用 `xprtsec=tls` 选项时,代码尝试访问未正确初始化的 rpc_stat 结构体,从而导致空指针解引用错误。这是因为相关的 rpc_stat 初始化逻辑没有被正确扩展到 TCP TLS 功能中。 + + **漏洞效果**: + 该漏洞会导致内核崩溃,使系统无法正常运行 SUNRPC 相关功能(例如通过 TLS 加密的 NFS 挂载)。这可能会中断依赖这些功能的服务或应用程序,但并不直接涉及权限提升或数据泄露等问题。 + +cve: ./data/2024/36xxx/CVE-2024-36939.json +In the Linux kernel, the following vulnerability has been resolved: + +nfs: Handle error of rpc_proc_register() in nfs_net_init(). + +syzkaller reported a warning [0] triggered while destroying immature +netns. + +rpc_proc_register() was called in init_nfs_fs(), but its error +has been ignored since at least the initial commit 1da177e4c3f4 +("Linux-2.6.12-rc2"). + +Recently, commit d47151b79e32 ("nfs: expose /proc/net/sunrpc/nfs +in net namespaces") converted the procfs to per-netns and made +the problem more visible. + +Even when rpc_proc_register() fails, nfs_net_init() could succeed, +and thus nfs_net_exit() will be called while destroying the netns. + +Then, remove_proc_entry() will be called for non-existing proc +directory and trigger the warning below. + +Let's handle the error of rpc_proc_register() properly in nfs_net_init(). + +[0]: +name 'nfs' +WARNING: CPU: 1 PID: 1710 at fs/proc/generic.c:711 remove_proc_entry+0x1bb/0x2d0 fs/proc/generic.c:711 +Modules linked in: +CPU: 1 PID: 1710 Comm: syz-executor.2 Not tainted 6.8.0-12822-gcd51db110a7e #12 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +RIP: 0010:remove_proc_entry+0x1bb/0x2d0 fs/proc/generic.c:711 +Code: 41 5d 41 5e c3 e8 85 09 b5 ff 48 c7 c7 88 58 64 86 e8 09 0e 71 02 e8 74 09 b5 ff 4c 89 e6 48 c7 c7 de 1b 80 84 e8 c5 ad 97 ff <0f> 0b eb b1 e8 5c 09 b5 ff 48 c7 c7 88 58 64 86 e8 e0 0d 71 02 eb +RSP: 0018:ffffc9000c6d7ce0 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: ffff8880422b8b00 RCX: ffffffff8110503c +RDX: ffff888030652f00 RSI: ffffffff81105045 RDI: 0000000000000001 +RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: ffffffff81bb62cb R12: ffffffff84807ffc +R13: ffff88804ad6fcc0 R14: ffffffff84807ffc R15: ffffffff85741ff8 +FS: 00007f30cfba8640(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007ff51afe8000 CR3: 000000005a60a005 CR4: 0000000000770ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + rpc_proc_unregister+0x64/0x70 net/sunrpc/stats.c:310 + nfs_net_exit+0x1c/0x30 fs/nfs/inode.c:2438 + ops_exit_list+0x62/0xb0 net/core/net_namespace.c:170 + setup_net+0x46c/0x660 net/core/net_namespace.c:372 + copy_net_ns+0x244/0x590 net/core/net_namespace.c:505 + create_new_namespaces+0x2ed/0x770 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0xae/0x160 kernel/nsproxy.c:228 + ksys_unshare+0x342/0x760 kernel/fork.c:3322 + __do_sys_unshare kernel/fork.c:3393 [inline] + __se_sys_unshare kernel/fork.c:3391 [inline] + __x64_sys_unshare+0x1f/0x30 kernel/fork.c:3391 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x46/0x4e +RIP: 0033:0x7f30d0febe5d +Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 +RSP: 002b:00007f30cfba7cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 +RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f30d0febe5d +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000006c020600 +RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 +R13: 000000000000000b R14: 00007f30d104c530 R15: 0000000000000000 + + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该漏洞与namespace相关。具体来说,它涉及网络命名空间(netns)的创建和销毁过程。在销毁未成熟的网络命名空间时触发了警告。 + +2. **这是什么程序的漏洞**: + 这是**Linux内核(Kernel)**的漏洞。漏洞发生在NFS(Network File System)子系统的实现中,特别是在处理`rpc_proc_register()`错误时。以下是详细分析: + + - **漏洞发生原因**: + 在`init_nfs_fs()`函数中调用了`rpc_proc_register()`,但其返回的错误未被正确处理。这可能导致即使`rpc_proc_register()`失败,`nfs_net_init()`仍成功返回,从而在后续销毁网络命名空间时调用`nfs_net_exit()`。此时,`remove_proc_entry()`会尝试移除一个不存在的proc目录,进而触发警告。 + + - **效果**: + 该漏洞会导致内核发出警告(WARNING),可能影响系统的稳定性。虽然不会直接导致系统崩溃,但在某些情况下可能会引发进一步的问题,例如数据丢失或服务中断。 + +3. **总结**: + 该CVE与网络命名空间(netns)相关,属于Linux内核的NFS子系统漏洞。问题源于对`rpc_proc_register()`错误处理不当,可能在销毁网络命名空间时触发内核警告。 + +cve: ./data/2024/38xxx/CVE-2024-38384.json +In the Linux kernel, the following vulnerability has been resolved: + +blk-cgroup: fix list corruption from reorder of WRITE ->lqueued + +__blkcg_rstat_flush() can be run anytime, especially when blk_cgroup_bio_start +is being executed. + +If WRITE of `->lqueued` is re-ordered with READ of 'bisc->lnode.next' in +the loop of __blkcg_rstat_flush(), `next_bisc` can be assigned with one +stat instance being added in blk_cgroup_bio_start(), then the local +list in __blkcg_rstat_flush() could be corrupted. + +Fix the issue by adding one barrier. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,这个 CVE 与 cgroup(控制组)相关。具体来说,问题出现在 `blk-cgroup` 的实现中,这是 Linux 内核中用于管理块设备 I/O 资源限制的一部分。 + +2. **漏洞分析:** + - **程序类型:** 这是 Linux 内核的漏洞。 + - **漏洞发生原因:** 在 `__blkcg_rstat_flush()` 函数运行时,如果 `blk_cgroup_bio_start` 同时执行,可能会导致对 `->lqueued` 的写操作与对 `bisc->lnode.next` 的读操作发生重排序。这种重排序会导致 `next_bisc` 被赋值为一个在 `blk_cgroup_bio_start()` 中新添加的统计实例,从而破坏本地列表的完整性。 + - **效果:** 此漏洞可能导致 `blk-cgroup` 的内部数据结构被破坏,进而可能引发内核崩溃或不稳定行为。在容器环境中,这可能会影响基于 cgroup 的资源限制功能,例如块设备 I/O 配额和优先级控制,从而削弱容器之间的隔离性或导致性能异常。 + +cve: ./data/2024/38xxx/CVE-2024-38564.json +In the Linux kernel, the following vulnerability has been resolved: + +bpf: Add BPF_PROG_TYPE_CGROUP_SKB attach type enforcement in BPF_LINK_CREATE + +bpf_prog_attach uses attach_type_to_prog_type to enforce proper +attach type for BPF_PROG_TYPE_CGROUP_SKB. link_create uses +bpf_prog_get and relies on bpf_prog_attach_check_attach_type +to properly verify prog_type <> attach_type association. + +Add missing attach_type enforcement for the link_create case. +Otherwise, it's currently possible to attach cgroup_skb prog +types to other cgroup hooks. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Linux内核的漏洞。漏洞发生在BPF(Berkeley Packet Filter)程序类型与连接类型之间的验证逻辑中,具体是在`BPF_LINK_CREATE`时缺少对`BPF_PROG_TYPE_CGROUP_SKB`类型的正确附加类型强制检查。由于这种检查缺失,可能导致`cgroup_skb`类型的BPF程序被错误地附加到其他cgroup钩子上。 + +效果:此漏洞可能破坏cgroup的预期行为和隔离性,允许恶意用户绕过容器或cgroup的限制,影响系统的安全性与稳定性。 + +cve: ./data/2024/38xxx/CVE-2024-38663.json +In the Linux kernel, the following vulnerability has been resolved: + +blk-cgroup: fix list corruption from resetting io stat + +Since commit 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()"), +each iostat instance is added to blkcg percpu list, so blkcg_reset_stats() +can't reset the stat instance by memset(), otherwise the llist may be +corrupted. + +Fix the issue by only resetting the counter part. + +analysis: 1. 该CVE信息与cgroup相关,因为提到的是`blk-cgroup`(块设备控制组)中的问题。 +2. 这是Linux内核的漏洞。漏洞发生在`blk-cgroup`模块中,由于代码优化(commit 3b8cc6298724),每次I/O统计实例被添加到`blkcg` percpu列表中,而`blkcg_reset_stats()`函数尝试通过`memset()`重置统计实例时,可能会导致链表(llist)损坏。此漏洞的效果可能导致内核数据结构损坏,进而引发系统不稳定、崩溃或潜在的特权提升风险。 + +cve: ./data/2024/39xxx/CVE-2024-39301.json +In the Linux kernel, the following vulnerability has been resolved: + +net/9p: fix uninit-value in p9_client_rpc() + +Syzbot with the help of KMSAN reported the following error: + +BUG: KMSAN: uninit-value in trace_9p_client_res include/trace/events/9p.h:146 [inline] +BUG: KMSAN: uninit-value in p9_client_rpc+0x1314/0x1340 net/9p/client.c:754 + trace_9p_client_res include/trace/events/9p.h:146 [inline] + p9_client_rpc+0x1314/0x1340 net/9p/client.c:754 + p9_client_create+0x1551/0x1ff0 net/9p/client.c:1031 + v9fs_session_init+0x1b9/0x28e0 fs/9p/v9fs.c:410 + v9fs_mount+0xe2/0x12b0 fs/9p/vfs_super.c:122 + legacy_get_tree+0x114/0x290 fs/fs_context.c:662 + vfs_get_tree+0xa7/0x570 fs/super.c:1797 + do_new_mount+0x71f/0x15e0 fs/namespace.c:3352 + path_mount+0x742/0x1f20 fs/namespace.c:3679 + do_mount fs/namespace.c:3692 [inline] + __do_sys_mount fs/namespace.c:3898 [inline] + __se_sys_mount+0x725/0x810 fs/namespace.c:3875 + __x64_sys_mount+0xe4/0x150 fs/namespace.c:3875 + do_syscall_64+0xd5/0x1f0 + entry_SYSCALL_64_after_hwframe+0x6d/0x75 + +Uninit was created at: + __alloc_pages+0x9d6/0xe70 mm/page_alloc.c:4598 + __alloc_pages_node include/linux/gfp.h:238 [inline] + alloc_pages_node include/linux/gfp.h:261 [inline] + alloc_slab_page mm/slub.c:2175 [inline] + allocate_slab mm/slub.c:2338 [inline] + new_slab+0x2de/0x1400 mm/slub.c:2391 + ___slab_alloc+0x1184/0x33d0 mm/slub.c:3525 + __slab_alloc mm/slub.c:3610 [inline] + __slab_alloc_node mm/slub.c:3663 [inline] + slab_alloc_node mm/slub.c:3835 [inline] + kmem_cache_alloc+0x6d3/0xbe0 mm/slub.c:3852 + p9_tag_alloc net/9p/client.c:278 [inline] + p9_client_prepare_req+0x20a/0x1770 net/9p/client.c:641 + p9_client_rpc+0x27e/0x1340 net/9p/client.c:688 + p9_client_create+0x1551/0x1ff0 net/9p/client.c:1031 + v9fs_session_init+0x1b9/0x28e0 fs/9p/v9fs.c:410 + v9fs_mount+0xe2/0x12b0 fs/9p/vfs_super.c:122 + legacy_get_tree+0x114/0x290 fs/fs_context.c:662 + vfs_get_tree+0xa7/0x570 fs/super.c:1797 + do_new_mount+0x71f/0x15e0 fs/namespace.c:3352 + path_mount+0x742/0x1f20 fs/namespace.c:3679 + do_mount fs/namespace.c:3692 [inline] + __do_sys_mount fs/namespace.c:3898 [inline] + __se_sys_mount+0x725/0x810 fs/namespace.c:3875 + __x64_sys_mount+0xe4/0x150 fs/namespace.c:3875 + do_syscall_64+0xd5/0x1f0 + entry_SYSCALL_64_after_hwframe+0x6d/0x75 + +If p9_check_errors() fails early in p9_client_rpc(), req->rc.tag +will not be properly initialized. However, trace_9p_client_res() +ends up trying to print it out anyway before p9_client_rpc() +finishes. + +Fix this issue by assigning default values to p9_fcall fields +such as 'tag' and (just in case KMSAN unearths something new) 'id' +during the tag allocation stage. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + N/A + +2. **程序漏洞分析:** + 这是 Linux 内核中的一个漏洞,具体发生在 9P 协议的实现部分(`net/9p/client.c`)。漏洞的原因是在 `p9_client_rpc()` 函数中,如果 `p9_check_errors()` 提前失败,`req->rc.tag` 不会被正确初始化,但后续代码(如 `trace_9p_client_res()`)仍然尝试访问并打印未初始化的值,导致潜在的未初始化值使用问题。此漏洞的效果是可能导致内核崩溃或信息泄露。 + +3. **无需额外信息,因与容器隔离无关。** + +cve: ./data/2024/39xxx/CVE-2024-39503.json +In the Linux kernel, the following vulnerability has been resolved: + +netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type + +Lion Ackermann reported that there is a race condition between namespace cleanup +in ipset and the garbage collection of the list:set type. The namespace +cleanup can destroy the list:set type of sets while the gc of the set type is +waiting to run in rcu cleanup. The latter uses data from the destroyed set which +thus leads use after free. The patch contains the following parts: + +- When destroying all sets, first remove the garbage collectors, then wait + if needed and then destroy the sets. +- Fix the badly ordered "wait then remove gc" for the destroy a single set + case. +- Fix the missing rcu locking in the list:set type in the userspace test + case. +- Use proper RCU list handlings in the list:set type. + +The patch depends on c1193d9bbbd3 (netfilter: ipset: Add list flush to cancel_gc). + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 信息与 namespace 相关。具体来说,问题涉及在 `ipset` 的垃圾回收(garbage collection, gc)与命名空间清理(namespace cleanup)之间的竞争条件(race condition)。这种竞争可能导致在命名空间被销毁时发生 "use-after-free" 错误。 + +2. **漏洞所在的程序及影响分析:** + - **程序类型:** 这是 Linux 内核(Kernel)中的一个漏洞。 + - **漏洞位置:** 漏洞发生在 `netfilter: ipset` 模块中,具体是 `list:set` 类型的集合处理逻辑。 + - **漏洞原因:** 在命名空间清理过程中,可能会销毁 `list:set` 类型的集合,而此时垃圾回收线程仍在等待 RCU 清理完成。如果垃圾回收线程随后尝试访问已被销毁的数据结构,则会导致 "use-after-free" 错误。 + - **漏洞效果:** 此漏洞可能引发内核崩溃(kernel oops 或 panic),从而导致系统不可用。此外,攻击者可能利用此漏洞触发内存损坏,进而可能实现权限提升或拒绝服务攻击(DoS)。 + +总结:这是一个与 namespace 和内核隔离机制相关的漏洞,存在于 Linux 内核的 `netfilter: ipset` 模块中,可能导致系统崩溃或被恶意利用。 + +cve: ./data/2024/39xxx/CVE-2024-39690.json +Capsule is a multi-tenancy and policy-based framework for Kubernetes. In Capsule v0.7.0 and earlier, the tenant-owner can patch any arbitrary namespace that has not been taken over by a tenant (i.e., namespaces without the ownerReference field), thereby gaining control of that namespace. + +analysis: 1. 这个CVE信息与namespace相关,因为描述中提到tenant-owner可以修补任何未被租户接管的namespace(即没有ownerReference字段的namespace),从而获得对该namespace的控制。 + +2. 这是Capsule框架的漏洞,Capsule是一个基于Kubernetes的多租户和策略框架。漏洞发生的原因是Capsule在v0.7.0及更早版本中,允许tenant-owner对未被租户接管的namespace进行任意修补。这种行为导致tenant-owner可能控制不属于他们的namespace,破坏了多租户环境中的隔离性,可能导致未经授权的资源访问或操作。 + +此漏洞并非内核Kernel、容器实现Docker或容器内部运行的应用的问题,而是Kubernetes生态系统中的一个高级管理工具(Capsule)的逻辑漏洞。 + +cve: ./data/2024/3xxx/CVE-2024-3056.json +A flaw was found in Podman. This issue may allow an attacker to create a specially crafted container that, when configured to share the same IPC with at least one other container, can create a large number of IPC resources in /dev/shm. The malicious container will continue to exhaust resources until it is out-of-memory (OOM) killed. While the malicious container's cgroup will be removed, the IPC resources it created are not. Those resources are tied to the IPC namespace that will not be removed until all containers using it are stopped, and one non-malicious container is holding the namespace open. The malicious container is restarted, either automatically or by attacker control, repeating the process and increasing the amount of memory consumed. With a container configured to restart always, such as `podman run --restart=always`, this can result in a memory-based denial of service of the system. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,这个 CVE 信息与 namespace、cgroup 和容器隔离机制密切相关。具体涉及 IPC(Inter-Process Communication)共享和 cgroup 的资源管理问题。 + +2. **这是什么程序的漏洞:** + 这是 **Podman** 的漏洞。 + - 漏洞发生的原因:攻击者可以通过创建一个恶意容器,该容器配置为与至少另一个容器共享相同的 IPC 命名空间。然后,恶意容器会在 `/dev/shm` 中创建大量 IPC 资源(如共享内存段),导致系统内存被耗尽。 + - 漏洞的效果:当恶意容器因 OOM 被杀死后,其 cgroup 被移除,但由该容器创建的 IPC 资源仍然存在,因为这些资源绑定到 IPC 命名空间,而 IPC 命名空间只有在使用它的所有容器都被停止时才会释放。如果有一个非恶意容器仍然在使用该 IPC 命名空间,则命名空间不会被销毁。如果恶意容器配置为自动重启(例如通过 `--restart=always` 参数),它会重复上述过程,进一步消耗系统内存,最终可能导致基于内存的拒绝服务攻击(DoS)。 + +总结:这是一个与容器隔离相关的漏洞,主要影响 Podman 的 IPC 资源管理和 cgroup 清理机制。 + +cve: ./data/2024/40xxx/CVE-2024-40947.json +In the Linux kernel, the following vulnerability has been resolved: + +ima: Avoid blocking in RCU read-side critical section + +A panic happens in ima_match_policy: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 +PGD 42f873067 P4D 0 +Oops: 0000 [#1] SMP NOPTI +CPU: 5 PID: 1286325 Comm: kubeletmonit.sh +Kdump: loaded Tainted: P +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), + BIOS 0.0.0 02/06/2015 +RIP: 0010:ima_match_policy+0x84/0x450 +Code: 49 89 fc 41 89 cf 31 ed 89 44 24 14 eb 1c 44 39 + 7b 18 74 26 41 83 ff 05 74 20 48 8b 1b 48 3b 1d + f2 b9 f4 00 0f 84 9c 01 00 00 <44> 85 73 10 74 ea + 44 8b 6b 14 41 f6 c5 01 75 d4 41 f6 c5 02 74 0f +RSP: 0018:ff71570009e07a80 EFLAGS: 00010207 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000200 +RDX: ffffffffad8dc7c0 RSI: 0000000024924925 RDI: ff3e27850dea2000 +RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffffabfce739 +R10: ff3e27810cc42400 R11: 0000000000000000 R12: ff3e2781825ef970 +R13: 00000000ff3e2785 R14: 000000000000000c R15: 0000000000000001 +FS: 00007f5195b51740(0000) +GS:ff3e278b12d40000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000010 CR3: 0000000626d24002 CR4: 0000000000361ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + ima_get_action+0x22/0x30 + process_measurement+0xb0/0x830 + ? page_add_file_rmap+0x15/0x170 + ? alloc_set_pte+0x269/0x4c0 + ? prep_new_page+0x81/0x140 + ? simple_xattr_get+0x75/0xa0 + ? selinux_file_open+0x9d/0xf0 + ima_file_check+0x64/0x90 + path_openat+0x571/0x1720 + do_filp_open+0x9b/0x110 + ? page_counter_try_charge+0x57/0xc0 + ? files_cgroup_alloc_fd+0x38/0x60 + ? __alloc_fd+0xd4/0x250 + ? do_sys_open+0x1bd/0x250 + do_sys_open+0x1bd/0x250 + do_syscall_64+0x5d/0x1d0 + entry_SYSCALL_64_after_hwframe+0x65/0xca + +Commit c7423dbdbc9e ("ima: Handle -ESTALE returned by +ima_filter_rule_match()") introduced call to ima_lsm_copy_rule within a +RCU read-side critical section which contains kmalloc with GFP_KERNEL. +This implies a possible sleep and violates limitations of RCU read-side +critical sections on non-PREEMPT systems. + +Sleeping within RCU read-side critical section might cause +synchronize_rcu() returning early and break RCU protection, allowing a +UAF to happen. + +The root cause of this issue could be described as follows: +| Thread A | Thread B | +| |ima_match_policy | +| | rcu_read_lock | +|ima_lsm_update_rule | | +| synchronize_rcu | | +| | kmalloc(GFP_KERNEL)| +| | sleep | +==> synchronize_rcu returns early +| kfree(entry) | | +| | entry = entry->next| +==> UAF happens and entry now becomes NULL (or could be anything). +| | entry->action | +==> Accessing entry might cause panic. + +To fix this issue, we are converting all kmalloc that is called within +RCU read-side critical section to use GFP_ATOMIC. + +[PM: fixed missing comment, long lines, !CONFIG_IMA_LSM_RULES case] + +analysis: 1. **是否与 namespace、cgroup、container 或容器隔离相关:** + N/A + +2. **程序漏洞分析:** + - **漏洞所属程序:** Linux 内核 (Kernel) + - **漏洞发生原因:** + 在 `ima_match_policy` 函数中,由于 `ima_lsm_copy_rule` 被调用时处于 RCU(Read-Copy-Update)读侧关键区段内,而该函数内部使用了 `kmalloc(GFP_KERNEL)`,这可能导致睡眠操作。RCU 读侧关键区段不允许睡眠,否则会违反 RCU 的规则,导致 `synchronize_rcu()` 提前返回,并破坏 RCU 保护机制,从而引发 UAF(Use-After-Free)问题。 + - **漏洞效果:** + 此漏洞可能导致内核崩溃(Kernel Panic),具体表现为内核尝试访问已释放的内存地址(NULL 指针解引用)。这种问题在多线程环境下尤其危险,可能被恶意攻击者利用来触发系统不稳定或执行任意代码。 + +3. **结论:** + 该 CVE 与容器、namespace、cgroup 或隔离机制无关。 + +cve: ./data/2024/40xxx/CVE-2024-40949.json +In the Linux kernel, the following vulnerability has been resolved: + +mm: shmem: fix getting incorrect lruvec when replacing a shmem folio + +When testing shmem swapin, I encountered the warning below on my machine. +The reason is that replacing an old shmem folio with a new one causes +mem_cgroup_migrate() to clear the old folio's memcg data. As a result, +the old folio cannot get the correct memcg's lruvec needed to remove +itself from the LRU list when it is being freed. This could lead to +possible serious problems, such as LRU list crashes due to holding the +wrong LRU lock, and incorrect LRU statistics. + +To fix this issue, we can fallback to use the mem_cgroup_replace_folio() +to replace the old shmem folio. + +[ 5241.100311] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x5d9960 +[ 5241.100317] head: order:4 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +[ 5241.100319] flags: 0x17fffe0000040068(uptodate|lru|head|swapbacked|node=0|zone=2|lastcpupid=0x3ffff) +[ 5241.100323] raw: 17fffe0000040068 fffffdffd6687948 fffffdffd69ae008 0000000000000000 +[ 5241.100325] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 +[ 5241.100326] head: 17fffe0000040068 fffffdffd6687948 fffffdffd69ae008 0000000000000000 +[ 5241.100327] head: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 +[ 5241.100328] head: 17fffe0000000204 fffffdffd6665801 ffffffffffffffff 0000000000000000 +[ 5241.100329] head: 0000000a00000010 0000000000000000 00000000ffffffff 0000000000000000 +[ 5241.100330] page dumped because: VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled()) +[ 5241.100338] ------------[ cut here ]------------ +[ 5241.100339] WARNING: CPU: 19 PID: 78402 at include/linux/memcontrol.h:775 folio_lruvec_lock_irqsave+0x140/0x150 +[...] +[ 5241.100374] pc : folio_lruvec_lock_irqsave+0x140/0x150 +[ 5241.100375] lr : folio_lruvec_lock_irqsave+0x138/0x150 +[ 5241.100376] sp : ffff80008b38b930 +[...] +[ 5241.100398] Call trace: +[ 5241.100399] folio_lruvec_lock_irqsave+0x140/0x150 +[ 5241.100401] __page_cache_release+0x90/0x300 +[ 5241.100404] __folio_put+0x50/0x108 +[ 5241.100406] shmem_replace_folio+0x1b4/0x240 +[ 5241.100409] shmem_swapin_folio+0x314/0x528 +[ 5241.100411] shmem_get_folio_gfp+0x3b4/0x930 +[ 5241.100412] shmem_fault+0x74/0x160 +[ 5241.100414] __do_fault+0x40/0x218 +[ 5241.100417] do_shared_fault+0x34/0x1b0 +[ 5241.100419] do_fault+0x40/0x168 +[ 5241.100420] handle_pte_fault+0x80/0x228 +[ 5241.100422] __handle_mm_fault+0x1c4/0x440 +[ 5241.100424] handle_mm_fault+0x60/0x1f0 +[ 5241.100426] do_page_fault+0x120/0x488 +[ 5241.100429] do_translation_fault+0x4c/0x68 +[ 5241.100431] do_mem_abort+0x48/0xa0 +[ 5241.100434] el0_da+0x38/0xc0 +[ 5241.100436] el0t_64_sync_handler+0x68/0xc0 +[ 5241.100437] el0t_64_sync+0x14c/0x150 +[ 5241.100439] ---[ end trace 0000000000000000 ]--- + +[baolin.wang@linux.alibaba.com: remove less helpful comments, per Matthew] + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,此漏洞与cgroup相关。问题涉及到`mem_cgroup_migrate()`函数和内存控制组(memory cgroup)的数据管理,这直接影响到cgroup的功能,而cgroup是容器实现资源隔离的核心技术之一。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核(Kernel)中的一个漏洞。 + - **漏洞发生原因**:在替换一个旧的shmem folio时,`mem_cgroup_migrate()`会清除旧folio的memcg数据。当旧folio被释放时,由于缺少正确的memcg信息,无法正确获取LRU列表所需的lruvec,从而可能导致LRU列表崩溃或统计错误。 + - **效果**:此漏洞可能会导致严重的系统问题,例如LRU列表崩溃(持有错误的LRU锁)或不正确的LRU统计信息,进而影响内存管理的稳定性,尤其是在使用cgroup限制内存使用的场景下。 + +总结:此CVE描述了一个与cgroup相关的Linux内核内存管理漏洞,可能导致内存管理功能异常,影响依赖cgroup实现资源隔离的容器环境。 + +cve: ./data/2024/41xxx/CVE-2024-41000.json +In the Linux kernel, the following vulnerability has been resolved: + +block/ioctl: prefer different overflow check + +Running syzkaller with the newly reintroduced signed integer overflow +sanitizer shows this report: + +[ 62.982337] ------------[ cut here ]------------ +[ 62.985692] cgroup: Invalid name +[ 62.986211] UBSAN: signed-integer-overflow in ../block/ioctl.c:36:46 +[ 62.989370] 9pnet_fd: p9_fd_create_tcp (7343): problem connecting socket to 127.0.0.1 +[ 62.992992] 9223372036854775807 + 4095 cannot be represented in type 'long long' +[ 62.997827] 9pnet_fd: p9_fd_create_tcp (7345): problem connecting socket to 127.0.0.1 +[ 62.999369] random: crng reseeded on system resumption +[ 63.000634] GUP no longer grows the stack in syz-executor.2 (7353): 20002000-20003000 (20001000) +[ 63.000668] CPU: 0 PID: 7353 Comm: syz-executor.2 Not tainted 6.8.0-rc2-00035-gb3ef86b5a957 #1 +[ 63.000677] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +[ 63.000682] Call Trace: +[ 63.000686] +[ 63.000731] dump_stack_lvl+0x93/0xd0 +[ 63.000919] __get_user_pages+0x903/0xd30 +[ 63.001030] __gup_longterm_locked+0x153e/0x1ba0 +[ 63.001041] ? _raw_read_unlock_irqrestore+0x17/0x50 +[ 63.001072] ? try_get_folio+0x29c/0x2d0 +[ 63.001083] internal_get_user_pages_fast+0x1119/0x1530 +[ 63.001109] iov_iter_extract_pages+0x23b/0x580 +[ 63.001206] bio_iov_iter_get_pages+0x4de/0x1220 +[ 63.001235] iomap_dio_bio_iter+0x9b6/0x1410 +[ 63.001297] __iomap_dio_rw+0xab4/0x1810 +[ 63.001316] iomap_dio_rw+0x45/0xa0 +[ 63.001328] ext4_file_write_iter+0xdde/0x1390 +[ 63.001372] vfs_write+0x599/0xbd0 +[ 63.001394] ksys_write+0xc8/0x190 +[ 63.001403] do_syscall_64+0xd4/0x1b0 +[ 63.001421] ? arch_exit_to_user_mode_prepare+0x3a/0x60 +[ 63.001479] entry_SYSCALL_64_after_hwframe+0x6f/0x77 +[ 63.001535] RIP: 0033:0x7f7fd3ebf539 +[ 63.001551] Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +[ 63.001562] RSP: 002b:00007f7fd32570c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 63.001584] RAX: ffffffffffffffda RBX: 00007f7fd3ff3f80 RCX: 00007f7fd3ebf539 +[ 63.001590] RDX: 4db6d1e4f7e43360 RSI: 0000000020000000 RDI: 0000000000000004 +[ 63.001595] RBP: 00007f7fd3f1e496 R08: 0000000000000000 R09: 0000000000000000 +[ 63.001599] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +[ 63.001604] R13: 0000000000000006 R14: 00007f7fd3ff3f80 R15: 00007ffd415ad2b8 +... +[ 63.018142] ---[ end trace ]--- + +Historically, the signed integer overflow sanitizer did not work in the +kernel due to its interaction with `-fwrapv` but this has since been +changed [1] in the newest version of Clang; It was re-enabled in the +kernel with Commit 557f8c582a9ba8ab ("ubsan: Reintroduce signed overflow +sanitizer"). + +Let's rework this overflow checking logic to not actually perform an +overflow during the check itself, thus avoiding the UBSAN splat. + +[1]: https://github.com/llvm/llvm-project/pull/82432 + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 与 cgroup 相关。日志中明确提到 `cgroup: Invalid name`,表明问题发生在控制组(cgroup)相关的代码路径中。 + +2. **这是什么程序的漏洞**: + 这是 Linux 内核(Kernel)的漏洞。具体来说,问题出在块设备 ioctl 的实现中 (`block/ioctl.c`),由于整数溢出检查逻辑不当,导致了 UBSAN(Undefined Behavior Sanitizer)检测到 signed integer overflow。 + **漏洞发生原因**: + 在处理某些特定输入时(例如通过 syzkaller 测试工具生成的 fuzz 输入),内核中的整数溢出检查逻辑本身可能会触发溢出,从而引发 UBSAN 报告。这表明代码在验证输入时没有正确避免潜在的溢出行为。 + **效果**: + 虽然该漏洞不会直接导致系统崩溃或权限提升,但它可能暴露出内核代码中的潜在缺陷,使得攻击者有机会利用类似的问题来触发其他更严重的漏洞(如内存损坏或拒绝服务)。此外,cgroup 是容器隔离机制的重要组成部分,因此任何与 cgroup 相关的漏洞都可能影响容器环境的安全性。 + +cve: ./data/2024/41xxx/CVE-2024-41010.json +In the Linux kernel, the following vulnerability has been resolved: + +bpf: Fix too early release of tcx_entry + +Pedro Pinto and later independently also Hyunwoo Kim and Wongi Lee reported +an issue that the tcx_entry can be released too early leading to a use +after free (UAF) when an active old-style ingress or clsact qdisc with a +shared tc block is later replaced by another ingress or clsact instance. + +Essentially, the sequence to trigger the UAF (one example) can be as follows: + + 1. A network namespace is created + 2. An ingress qdisc is created. This allocates a tcx_entry, and + &tcx_entry->miniq is stored in the qdisc's miniqp->p_miniq. At the + same time, a tcf block with index 1 is created. + 3. chain0 is attached to the tcf block. chain0 must be connected to + the block linked to the ingress qdisc to later reach the function + tcf_chain0_head_change_cb_del() which triggers the UAF. + 4. Create and graft a clsact qdisc. This causes the ingress qdisc + created in step 1 to be removed, thus freeing the previously linked + tcx_entry: + + rtnetlink_rcv_msg() + => tc_modify_qdisc() + => qdisc_create() + => clsact_init() [a] + => qdisc_graft() + => qdisc_destroy() + => __qdisc_destroy() + => ingress_destroy() [b] + => tcx_entry_free() + => kfree_rcu() // tcx_entry freed + + 5. Finally, the network namespace is closed. This registers the + cleanup_net worker, and during the process of releasing the + remaining clsact qdisc, it accesses the tcx_entry that was + already freed in step 4, causing the UAF to occur: + + cleanup_net() + => ops_exit_list() + => default_device_exit_batch() + => unregister_netdevice_many() + => unregister_netdevice_many_notify() + => dev_shutdown() + => qdisc_put() + => clsact_destroy() [c] + => tcf_block_put_ext() + => tcf_chain0_head_change_cb_del() + => tcf_chain_head_change_item() + => clsact_chain_head_change() + => mini_qdisc_pair_swap() // UAF + +There are also other variants, the gist is to add an ingress (or clsact) +qdisc with a specific shared block, then to replace that qdisc, waiting +for the tcx_entry kfree_rcu() to be executed and subsequently accessing +the current active qdisc's miniq one way or another. + +The correct fix is to turn the miniq_active boolean into a counter. What +can be observed, at step 2 above, the counter transitions from 0->1, at +step [a] from 1->2 (in order for the miniq object to remain active during +the replacement), then in [b] from 2->1 and finally [c] 1->0 with the +eventual release. The reference counter in general ranges from [0,2] and +it does not need to be atomic since all access to the counter is protected +by the rtnl mutex. With this in place, there is no longer a UAF happening +and the tcx_entry is freed at the correct time. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该漏洞与namespace相关。描述中明确提到创建了一个网络命名空间(network namespace),并且在关闭网络命名空间时触发了资源释放问题。 + +2. **这是什么程序的漏洞**: + - 这是Linux内核(Kernel)的漏洞。 + - 漏洞发生在BPF(Berkeley Packet Filter)子系统中,具体涉及tcx_entry的过早释放问题。 + - 漏洞的发生是由于在网络命名空间中创建和替换qdisc(排队规则)时,未正确管理tcx_entry的生命周期,导致Use-After-Free(UAF)问题。 + - 效果:攻击者可能利用此漏洞导致内核崩溃或执行任意代码,从而破坏系统的稳定性或安全性。 + +总结:这是一个Linux内核中的BPF子系统漏洞,与网络命名空间的使用密切相关,可能导致Use-After-Free问题,进而引发系统不稳定或被提权攻击。 + +cve: ./data/2024/41xxx/CVE-2024-41110.json +Moby is an open-source project created by Docker for software containerization. A security vulnerability has been detected in certain versions of Docker Engine, which could allow an attacker to bypass authorization plugins (AuthZ) under specific circumstances. The base likelihood of this being exploited is low. + +Using a specially-crafted API request, an Engine API client could make the daemon forward the request or response to an authorization plugin without the body. In certain circumstances, the authorization plugin may allow a request which it would have otherwise denied if the body had been forwarded to it. + +A security issue was discovered In 2018, where an attacker could bypass AuthZ plugins using a specially crafted API request. This could lead to unauthorized actions, including privilege escalation. Although this issue was fixed in Docker Engine v18.09.1 in January 2019, the fix was not carried forward to later major versions, resulting in a regression. Anyone who depends on authorization plugins that introspect the request and/or response body to make access control decisions is potentially impacted. + +Docker EE v19.03.x and all versions of Mirantis Container Runtime are not vulnerable. + +docker-ce v27.1.1 containes patches to fix the vulnerability. Patches have also been merged into the master, 19.03, 20.0, 23.0, 24.0, 25.0, 26.0, and 26.1 release branches. If one is unable to upgrade immediately, avoid using AuthZ plugins and/or restrict access to the Docker API to trusted parties, following the principle of least privilege. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器相关。它涉及 Docker Engine 的授权插件(AuthZ)机制,而 Docker Engine 是一个用于管理容器的平台。虽然此漏洞并不直接涉及 namespace 或 cgroup 的实现,但它影响了容器环境中的访问控制和安全隔离。 + +2. **这是什么程序的漏洞,如何发生,有何效果:** + - **程序:** Docker Engine(具体为 Moby 项目中的 Docker Engine 实现)。 + - **漏洞原因:** 漏洞发生在 Docker Engine 处理 API 请求时,未能正确将请求或响应的主体(body)转发给授权插件(AuthZ)。这使得攻击者可以通过构造特定的 API 请求绕过授权检查。 + - **效果:** 攻击者可能利用此漏洞执行未经授权的操作,包括潜在的权限提升(privilege escalation)。这会破坏容器环境中的访问控制和安全隔离,允许未授权的用户或进程对容器资源进行操作。 + +**总结:** 该 CVE 与容器相关,影响 Docker Engine 的授权插件机制,可能导致访问控制被绕过,从而破坏容器环境的安全隔离。 + +cve: ./data/2024/41xxx/CVE-2024-41932.json +In the Linux kernel, the following vulnerability has been resolved: + +sched: fix warning in sched_setaffinity + +Commit 8f9ea86fdf99b added some logic to sched_setaffinity that included +a WARN when a per-task affinity assignment races with a cpuset update. + +Specifically, we can have a race where a cpuset update results in the +task affinity no longer being a subset of the cpuset. That's fine; we +have a fallback to instead use the cpuset mask. However, we have a WARN +set up that will trigger if the cpuset mask has no overlap at all with +the requested task affinity. This shouldn't be a warning condition; its +trivial to create this condition. + +Reproduced the warning by the following setup: + +- $PID inside a cpuset cgroup +- another thread repeatedly switching the cpuset cpus from 1-2 to just 1 +- another thread repeatedly setting the $PID affinity (via taskset) to 2 + +analysis: 1. **分析这个CVE信息是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与cgroup和隔离相关。问题涉及`cpuset` cgroup子系统,该子系统用于限制任务可以使用的CPU和内存节点。此外,`sched_setaffinity`函数的操作可能会影响容器内的任务调度行为。 + +2. **这是什么程序的漏洞**: + 这是Linux内核(Kernel)的漏洞。漏洞发生在`sched_setaffinity`函数中,当任务的CPU亲和性设置与`cpuset` cgroup更新发生竞争时,可能会触发不必要的警告(WARN)。 + + **漏洞如何发生**: + 当一个线程尝试为某个任务设置CPU亲和性(通过`taskset`或其他方式),而与此同时另一个线程正在更新该任务所属的`cpuset` cgroup配置时,可能发生竞争条件。如果`cpuset`的CPU掩码与请求的任务亲和性完全没有重叠,内核会触发一个警告(WARN)。然而,这种情况下触发警告并不合理,因为系统会自动回退到使用`cpuset`的掩码。 + + **漏洞效果**: + 该漏洞的主要影响是可能导致内核日志中出现不必要的警告信息(WARN),这可能会引起误报或混淆。虽然这个问题本身不会直接导致安全风险(如权限提升或数据泄露),但它可能干扰系统管理员对实际问题的判断。在容器化环境中,如果大量容器频繁触发此警告,可能会增加运维复杂性。 + +cve: ./data/2024/41xxx/CVE-2024-41968.json +A low privileged remote attacker may modify the docker settings setup of the device, leading to a limited DoS. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是容器实现Docker的漏洞。低权限的远程攻击者可以修改设备上的Docker设置,从而导致有限的拒绝服务(DoS)。此漏洞的发生可能是由于Docker配置文件或API未受到充分保护,使得攻击者能够篡改其设置,进而影响容器的正常运行或主机资源的可用性。 + +cve: ./data/2024/42xxx/CVE-2024-42104.json +In the Linux kernel, the following vulnerability has been resolved: + +nilfs2: add missing check for inode numbers on directory entries + +Syzbot reported that mounting and unmounting a specific pattern of +corrupted nilfs2 filesystem images causes a use-after-free of metadata +file inodes, which triggers a kernel bug in lru_add_fn(). + +As Jan Kara pointed out, this is because the link count of a metadata file +gets corrupted to 0, and nilfs_evict_inode(), which is called from iput(), +tries to delete that inode (ifile inode in this case). + +The inconsistency occurs because directories containing the inode numbers +of these metadata files that should not be visible in the namespace are +read without checking. + +Fix this issue by treating the inode numbers of these internal files as +errors in the sanity check helper when reading directory folios/pages. + +Also thanks to Hillf Danton and Matthew Wilcox for their initial mm-layer +analysis. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + N/A + +2. **漏洞分析:** + - **程序类型:** Linux内核 (Kernel) + - **漏洞描述:** 该漏洞存在于Linux内核的NILFS2文件系统实现中。由于在处理目录条目时缺少对inode号码的有效性检查,导致元数据文件的引用计数被破坏为0。当卸载特定模式的损坏NILFS2文件系统映像时,会触发元数据文件inode的use-after-free问题,最终在`lru_add_fn()`函数中引发内核错误。 + - **漏洞发生原因:** 在读取目录页/folio时,未对不应在命名空间中可见的元数据文件inode号码进行检查,从而导致了不一致性和潜在的内核崩溃。 + - **效果:** 攻击者可能通过挂载和卸载特定的损坏NILFS2文件系统映像,利用此漏洞导致内核崩溃或可能的权限提升。 + +3. **总结:** 该CVE与namespace、cgroup、container或容器隔离无关。 + +cve: ./data/2024/42xxx/CVE-2024-42105.json +In the Linux kernel, the following vulnerability has been resolved: + +nilfs2: fix inode number range checks + +Patch series "nilfs2: fix potential issues related to reserved inodes". + +This series fixes one use-after-free issue reported by syzbot, caused by +nilfs2's internal inode being exposed in the namespace on a corrupted +filesystem, and a couple of flaws that cause problems if the starting +number of non-reserved inodes written in the on-disk super block is +intentionally (or corruptly) changed from its default value. + + +This patch (of 3): + +In the current implementation of nilfs2, "nilfs->ns_first_ino", which +gives the first non-reserved inode number, is read from the superblock, +but its lower limit is not checked. + +As a result, if a number that overlaps with the inode number range of +reserved inodes such as the root directory or metadata files is set in the +super block parameter, the inode number test macros (NILFS_MDT_INODE and +NILFS_VALID_INODE) will not function properly. + +In addition, these test macros use left bit-shift calculations using with +the inode number as the shift count via the BIT macro, but the result of a +shift calculation that exceeds the bit width of an integer is undefined in +the C specification, so if "ns_first_ino" is set to a large value other +than the default value NILFS_USER_INO (=11), the macros may potentially +malfunction depending on the environment. + +Fix these issues by checking the lower bound of "nilfs->ns_first_ino" and +by preventing bit shifts equal to or greater than the NILFS_USER_INO +constant in the inode number test macros. + +Also, change the type of "ns_first_ino" from signed integer to unsigned +integer to avoid the need for type casting in comparisons such as the +lower bound check introduced this time. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器隔离相关** + - 该 CVE 描述中提到的问题是关于 `nilfs2` 文件系统在处理 inode 编号范围时的潜在问题。虽然提到了 "namespace",但这里的 "namespace" 是指文件系统的内部命名空间(如 inode 的逻辑分组),而不是 Linux 容器中用于隔离的 namespace(如 PID、network、mount 等)。因此,此漏洞与 namespace、cgroup、container 或者容器隔离无关。 + +2. **程序漏洞分析** + - **这是什么程序的漏洞**:这是一个 Linux 内核 (Kernel) 的漏洞,具体涉及 `nilfs2` 文件系统的实现。 + - **漏洞如何发生**:`nilfs2` 文件系统从超级块中读取了第一个非保留 inode 编号 (`ns_first_ino`),但未对其进行下限检查。如果超级块中的值被篡改为与保留 inode 编号范围重叠的值,或者设置为一个非常大的值,则可能导致以下问题: + - inode 编号测试宏(如 `NILFS_MDT_INODE` 和 `NILFS_VALID_INODE`)无法正常工作。 + - 在某些环境中,由于 C 语言规范中对位移操作的限制,可能会导致未定义行为。 + - **漏洞效果**:攻击者可以通过构造一个损坏的文件系统超级块,触发此漏洞,可能导致文件系统数据损坏、系统崩溃或不可预期的行为。 + +**结论**:N/A + +cve: ./data/2024/42xxx/CVE-2024-42311.json +In the Linux kernel, the following vulnerability has been resolved: + +hfs: fix to initialize fields of hfs_inode_info after hfs_alloc_inode() + +Syzbot reports uninitialized value access issue as below: + +loop0: detected capacity change from 0 to 64 +===================================================== +BUG: KMSAN: uninit-value in hfs_revalidate_dentry+0x307/0x3f0 fs/hfs/sysdep.c:30 + hfs_revalidate_dentry+0x307/0x3f0 fs/hfs/sysdep.c:30 + d_revalidate fs/namei.c:862 [inline] + lookup_fast+0x89e/0x8e0 fs/namei.c:1649 + walk_component fs/namei.c:2001 [inline] + link_path_walk+0x817/0x1480 fs/namei.c:2332 + path_lookupat+0xd9/0x6f0 fs/namei.c:2485 + filename_lookup+0x22e/0x740 fs/namei.c:2515 + user_path_at_empty+0x8b/0x390 fs/namei.c:2924 + user_path_at include/linux/namei.h:57 [inline] + do_mount fs/namespace.c:3689 [inline] + __do_sys_mount fs/namespace.c:3898 [inline] + __se_sys_mount+0x66b/0x810 fs/namespace.c:3875 + __x64_sys_mount+0xe4/0x140 fs/namespace.c:3875 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x63/0x6b + +BUG: KMSAN: uninit-value in hfs_ext_read_extent fs/hfs/extent.c:196 [inline] +BUG: KMSAN: uninit-value in hfs_get_block+0x92d/0x1620 fs/hfs/extent.c:366 + hfs_ext_read_extent fs/hfs/extent.c:196 [inline] + hfs_get_block+0x92d/0x1620 fs/hfs/extent.c:366 + block_read_full_folio+0x4ff/0x11b0 fs/buffer.c:2271 + hfs_read_folio+0x55/0x60 fs/hfs/inode.c:39 + filemap_read_folio+0x148/0x4f0 mm/filemap.c:2426 + do_read_cache_folio+0x7c8/0xd90 mm/filemap.c:3553 + do_read_cache_page mm/filemap.c:3595 [inline] + read_cache_page+0xfb/0x2f0 mm/filemap.c:3604 + read_mapping_page include/linux/pagemap.h:755 [inline] + hfs_btree_open+0x928/0x1ae0 fs/hfs/btree.c:78 + hfs_mdb_get+0x260c/0x3000 fs/hfs/mdb.c:204 + hfs_fill_super+0x1fb1/0x2790 fs/hfs/super.c:406 + mount_bdev+0x628/0x920 fs/super.c:1359 + hfs_mount+0xcd/0xe0 fs/hfs/super.c:456 + legacy_get_tree+0x167/0x2e0 fs/fs_context.c:610 + vfs_get_tree+0xdc/0x5d0 fs/super.c:1489 + do_new_mount+0x7a9/0x16f0 fs/namespace.c:3145 + path_mount+0xf98/0x26a0 fs/namespace.c:3475 + do_mount fs/namespace.c:3488 [inline] + __do_sys_mount fs/namespace.c:3697 [inline] + __se_sys_mount+0x919/0x9e0 fs/namespace.c:3674 + __ia32_sys_mount+0x15b/0x1b0 fs/namespace.c:3674 + do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] + __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 + do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 + entry_SYSENTER_compat_after_hwframe+0x70/0x82 + +Uninit was created at: + __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590 + __alloc_pages_node include/linux/gfp.h:238 [inline] + alloc_pages_node include/linux/gfp.h:261 [inline] + alloc_slab_page mm/slub.c:2190 [inline] + allocate_slab mm/slub.c:2354 [inline] + new_slab+0x2d7/0x1400 mm/slub.c:2407 + ___slab_alloc+0x16b5/0x3970 mm/slub.c:3540 + __slab_alloc mm/slub.c:3625 [inline] + __slab_alloc_node mm/slub.c:3678 [inline] + slab_alloc_node mm/slub.c:3850 [inline] + kmem_cache_alloc_lru+0x64d/0xb30 mm/slub.c:3879 + alloc_inode_sb include/linux/fs.h:3018 [inline] + hfs_alloc_inode+0x5a/0xc0 fs/hfs/super.c:165 + alloc_inode+0x83/0x440 fs/inode.c:260 + new_inode_pseudo fs/inode.c:1005 [inline] + new_inode+0x38/0x4f0 fs/inode.c:1031 + hfs_new_inode+0x61/0x1010 fs/hfs/inode.c:186 + hfs_mkdir+0x54/0x250 fs/hfs/dir.c:228 + vfs_mkdir+0x49a/0x700 fs/namei.c:4126 + do_mkdirat+0x529/0x810 fs/namei.c:4149 + __do_sys_mkdirat fs/namei.c:4164 [inline] + __se_sys_mkdirat fs/namei.c:4162 [inline] + __x64_sys_mkdirat+0xc8/0x120 fs/namei.c:4162 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x63/0x6b + +It missed to initialize .tz_secondswest, .cached_start and .cached_blocks +fields in struct hfs_inode_info after hfs_alloc_inode(), fix it. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关** + N/A + +2. **这是什么程序的漏洞?如何发生?有何效果?** + - **程序**: Linux 内核 (Kernel) + - **漏洞发生原因**: 在 HFS 文件系统中,`hfs_alloc_inode()` 函数分配了 inode 信息结构体 `hfs_inode_info`,但未正确初始化其中的部分字段(如 `.tz_secondswest`、`.cached_start` 和 `.cached_blocks`)。这导致在后续使用这些未初始化字段时,可能会触发未定义行为,例如访问未初始化值(uninitialized value access)。此问题由 KMSAN(Kernel Memory Sanitizer)检测到。 + - **漏洞效果**: + - 可能会导致内核崩溃(kernel panic)或不稳定行为。 + - 攻击者可能利用此漏洞造成拒绝服务(DoS),或者在某些情况下进一步探索以实现权限提升。 + +3. **总结**: 此 CVE 是 Linux 内核中 HFS 文件系统的漏洞,与 namespace、cgroup、container 或容器隔离无关。 + +cve: ./data/2024/42xxx/CVE-2024-42486.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. In versions on the 1.15.x branch prior to 1.15.8 and the 1.16.x branch prior to 1.16.1, ReferenceGrant changes are not correctly propagated in Cilium's GatewayAPI controller, which could lead to Gateway resources being able to access secrets for longer than intended, or to Routes having the ability to forward traffic to backends in other namespaces for longer than intended. This issue has been patched in Cilium v1.15.8 and v1.16.1. As a workaround, any modification of a related Gateway/HTTPRoute/GRPCRoute/TCPRoute CRD (for example, adding any label to any of these resources) will trigger a reconciliation of ReferenceGrants on an affected cluster. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace 和容器隔离相关。问题涉及 Cilium 的 GatewayAPI 控制器在处理 ReferenceGrant 变更时的行为异常,可能导致跨命名空间(namespace)的流量转发权限超出预期范围。 + +2. **程序漏洞分析**: + - **程序**:这是 Cilium 的漏洞,Cilium 是一个基于 eBPF 的容器网络解决方案。 + - **漏洞发生原因**:Cilium 的 GatewayAPI 控制器未能正确传播 ReferenceGrant 的变更,导致 Gateway 资源可能访问秘密(secrets)的时间超过预期,或者路由规则可能允许流量转发到其他命名空间中的后端服务。 + - **效果**:此漏洞破坏了容器环境中的命名空间隔离性,攻击者可能利用这一问题访问不应访问的秘密或转发流量到未经授权的目标,从而影响容器之间的隔离性和安全性。 + +cve: ./data/2024/43xxx/CVE-2024-43803.json +The Bare Metal Operator (BMO) implements a Kubernetes API for managing bare metal hosts in Metal3. The `BareMetalHost` (BMH) CRD allows the `userData`, `metaData`, and `networkData` for the provisioned host to be specified as links to Kubernetes Secrets. There are fields for both the `Name` and `Namespace` of the Secret, meaning that versions of the baremetal-operator prior to 0.8.0, 0.6.2, and 0.5.2 will read a `Secret` from any namespace. A user with access to create or edit a `BareMetalHost` can thus exfiltrate a `Secret` from another namespace by using it as e.g. the `userData` for provisioning some host (note that this need not be a real host, it could be a VM somewhere). + +BMO will only read a key with the name `value` (or `userData`, `metaData`, or `networkData`), so that limits the exposure somewhat. `value` is probably a pretty common key though. Secrets used by _other_ `BareMetalHost`s in different namespaces are always vulnerable. It is probably relatively unusual for anyone other than cluster administrators to have RBAC access to create/edit a `BareMetalHost`. This vulnerability is only meaningful, if the cluster has users other than administrators and users' privileges are limited to their respective namespaces. + +The patch prevents BMO from accepting links to Secrets from other namespaces as BMH input. Any BMH configuration is only read from the same namespace only. The problem is patched in BMO releases v0.7.0, v0.6.2 and v0.5.2 and users should upgrade to those versions. Prior upgrading, duplicate the BMC Secrets to the namespace where the corresponding BMH is. After upgrade, remove the old Secrets. As a workaround, an operator can configure BMO RBAC to be namespace scoped for Secrets, instead of cluster scoped, to prevent BMO from accessing Secrets from other namespaces. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace 和隔离相关。问题的核心在于 Bare Metal Operator (BMO) 在处理 `Secret` 时,未正确限制对其他 namespace 的访问,导致跨 namespace 的数据泄露风险。 + +2. **这是什么程序的漏洞**: + 这是 **Bare Metal Operator (BMO)** 的漏洞,BMO 是一个用于管理裸金属主机的 Kubernetes API 实现。漏洞发生在 BMO 处理 `BareMetalHost` CRD 的逻辑中,具体来说,它允许用户通过指定 `Secret` 的名称和 namespace 来引用 Kubernetes Secret,并且在修复前,BMO 并未限制只能访问同一 namespace 内的 Secret。 + +3. **漏洞如何发生及效果**: + - 漏洞发生的原因是 BMO 在处理 `userData`, `metaData`, 或 `networkData` 字段时,允许引用任意 namespace 中的 Kubernetes Secret。 + - 攻击者如果拥有创建或编辑 `BareMetalHost` 的权限,可以利用此漏洞访问其他 namespace 中的 Secret 数据,即使这些 Secret 不属于攻击者的权限范围。 + - 效果是导致跨 namespace 的数据泄露,破坏了 Kubernetes 的 namespace 隔离机制。特别是当集群中有非管理员用户,且用户的权限被限制在特定 namespace 时,这种漏洞可能被恶意利用来获取敏感信息。 + +cve: ./data/2024/43xxx/CVE-2024-43853.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup/cpuset: Prevent UAF in proc_cpuset_show() + +An UAF can happen when /proc/cpuset is read as reported in [1]. + +This can be reproduced by the following methods: +1.add an mdelay(1000) before acquiring the cgroup_lock In the + cgroup_path_ns function. +2.$cat /proc//cpuset repeatly. +3.$mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset/ +$umount /sys/fs/cgroup/cpuset/ repeatly. + +The race that cause this bug can be shown as below: + +(umount) | (cat /proc//cpuset) +css_release | proc_cpuset_show +css_release_work_fn | css = task_get_css(tsk, cpuset_cgrp_id); +css_free_rwork_fn | cgroup_path_ns(css->cgroup, ...); +cgroup_destroy_root | mutex_lock(&cgroup_mutex); +rebind_subsystems | +cgroup_free_root | + | // cgrp was freed, UAF + | cgroup_path_ns_locked(cgrp,..); + +When the cpuset is initialized, the root node top_cpuset.css.cgrp +will point to &cgrp_dfl_root.cgrp. In cgroup v1, the mount operation will +allocate cgroup_root, and top_cpuset.css.cgrp will point to the allocated +&cgroup_root.cgrp. When the umount operation is executed, +top_cpuset.css.cgrp will be rebound to &cgrp_dfl_root.cgrp. + +The problem is that when rebinding to cgrp_dfl_root, there are cases +where the cgroup_root allocated by setting up the root for cgroup v1 +is cached. This could lead to a Use-After-Free (UAF) if it is +subsequently freed. The descendant cgroups of cgroup v1 can only be +freed after the css is released. However, the css of the root will never +be released, yet the cgroup_root should be freed when it is unmounted. +This means that obtaining a reference to the css of the root does +not guarantee that css.cgrp->root will not be freed. + +Fix this problem by using rcu_read_lock in proc_cpuset_show(). +As cgroup_root is kfree_rcu after commit d23b5c577715 +("cgroup: Make operations on the cgroup root_list RCU safe"), +css->cgroup won't be freed during the critical section. +To call cgroup_path_ns_locked, css_set_lock is needed, so it is safe to +replace task_get_css with task_css. + +[1] https://syzkaller.appspot.com/bug?extid=9b1ff7be974a403aa4cd + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与cgroup直接相关,具体涉及cpuset子系统。cgroup是Linux内核中用于资源限制和隔离的重要机制,广泛应用于容器技术(如Docker、Kubernetes)中以实现资源分配和隔离。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核中的漏洞。 + - **漏洞发生位置**:漏洞发生在`proc_cpuset_show()`函数中,当读取`/proc//cpuset`时可能会触发Use-After-Free (UAF)问题。 + - **漏洞原因**:在cgroup v1的挂载和卸载操作过程中,存在竞争条件(race condition)。具体来说,当执行`umount`操作时,`top_cpuset.css.cgrp`会被重新绑定到`&cgrp_dfl_root.cgrp`,但在此之前分配的`cgroup_root`可能仍然被缓存。如果随后释放了这个缓存的`cgroup_root`,而此时仍有对`css.cgroup->root`的引用,就会导致UAF问题。 + - **效果**:攻击者可能利用此漏洞导致系统崩溃(内核恐慌),或在某些情况下可能进一步利用UAF来执行任意代码,从而破坏系统的完整性和可用性。 + +3. **修复措施**: + 通过在`proc_cpuset_show()`中使用`rcu_read_lock()`,确保在关键代码段内`css->cgroup`不会被释放。此外,用`task_css`替代`task_get_css`以避免潜在的竞争条件。 + +cve: ./data/2024/43xxx/CVE-2024-43888.json +In the Linux kernel, the following vulnerability has been resolved: + +mm: list_lru: fix UAF for memory cgroup + +The mem_cgroup_from_slab_obj() is supposed to be called under rcu lock or +cgroup_mutex or others which could prevent returned memcg from being +freed. Fix it by adding missing rcu read lock. + +Found by code inspection. + +[songmuchun@bytedance.com: only grab rcu lock when necessary, per Vlastimil] + +analysis: 1. 这个CVE信息与cgroup相关。 +2. 这是Linux内核的漏洞。该漏洞发生在内存控制组(memory cgroup)的实现中,具体是在`mem_cgroup_from_slab_obj()`函数的调用过程中。由于缺少必要的RCU读锁(rcu read lock),可能导致使用后释放(Use-After-Free, UAF)问题。攻击者可能利用此漏洞破坏内存隔离,进而影响系统的稳定性和安全性。 + +cve: ./data/2024/43xxx/CVE-2024-43892.json +In the Linux kernel, the following vulnerability has been resolved: + +memcg: protect concurrent access to mem_cgroup_idr + +Commit 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after +many small jobs") decoupled the memcg IDs from the CSS ID space to fix the +cgroup creation failures. It introduced IDR to maintain the memcg ID +space. The IDR depends on external synchronization mechanisms for +modifications. For the mem_cgroup_idr, the idr_alloc() and idr_replace() +happen within css callback and thus are protected through cgroup_mutex +from concurrent modifications. However idr_remove() for mem_cgroup_idr +was not protected against concurrency and can be run concurrently for +different memcgs when they hit their refcnt to zero. Fix that. + +We have been seeing list_lru based kernel crashes at a low frequency in +our fleet for a long time. These crashes were in different part of +list_lru code including list_lru_add(), list_lru_del() and reparenting +code. Upon further inspection, it looked like for a given object (dentry +and inode), the super_block's list_lru didn't have list_lru_one for the +memcg of that object. The initial suspicions were either the object is +not allocated through kmem_cache_alloc_lru() or somehow +memcg_list_lru_alloc() failed to allocate list_lru_one() for a memcg but +returned success. No evidence were found for these cases. + +Looking more deeply, we started seeing situations where valid memcg's id +is not present in mem_cgroup_idr and in some cases multiple valid memcgs +have same id and mem_cgroup_idr is pointing to one of them. So, the most +reasonable explanation is that these situations can happen due to race +between multiple idr_remove() calls or race between +idr_alloc()/idr_replace() and idr_remove(). These races are causing +multiple memcgs to acquire the same ID and then offlining of one of them +would cleanup list_lrus on the system for all of them. Later access from +other memcgs to the list_lru cause crashes due to missing list_lru_one. + +analysis: ### 分析结果: + +1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与 cgroup(控制组)密切相关。具体来说,问题出现在内存控制组(memcg)的实现中,涉及 `mem_cgroup_idr` 的并发访问保护不足。 + +2. **漏洞所属程序及影响:** + - **程序:** Linux 内核(Kernel)。 + - **漏洞原因:** 在引入 IDR(ID 管理器)以维护 memcg ID 空间时,`idr_remove()` 操作未受到适当的同步保护,导致在不同 memcg 的引用计数降为零时可能发生并发调用。这种竞争条件使得多个 memcg 可能分配到相同的 ID,进而导致系统清理某个 memcg 的资源时,影响到其他 memcg 的资源(如 `list_lru`),最终引发内核崩溃。 + - **效果:** 此漏洞可能导致基于 `list_lru` 的内核崩溃,尤其是在处理 dentry 和 inode 对象时。这会影响系统的稳定性和可靠性,尤其是在大规模部署的环境中。 + +3. **总结:** + 该 CVE 是 Linux 内核中与 cgroup 相关的一个漏洞,主要由于 memcg ID 管理中的并发访问问题导致内核崩溃。它与容器隔离机制间接相关,因为 cgroup 是容器技术(如 Docker)实现资源限制和隔离的核心组件之一。 + +cve: ./data/2024/44xxx/CVE-2024-44975.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup/cpuset: fix panic caused by partcmd_update + +We find a bug as below: +BUG: unable to handle page fault for address: 00000003 +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP NOPTI +CPU: 3 PID: 358 Comm: bash Tainted: G W I 6.6.0-10893-g60d6 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/4 +RIP: 0010:partition_sched_domains_locked+0x483/0x600 +Code: 01 48 85 d2 74 0d 48 83 05 29 3f f8 03 01 f3 48 0f bc c2 89 c0 48 9 +RSP: 0018:ffffc90000fdbc58 EFLAGS: 00000202 +RAX: 0000000100000003 RBX: ffff888100b3dfa0 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000002fe80 +RBP: ffff888100b3dfb0 R08: 0000000000000001 R09: 0000000000000000 +R10: ffffc90000fdbcb0 R11: 0000000000000004 R12: 0000000000000002 +R13: ffff888100a92b48 R14: 0000000000000000 R15: 0000000000000000 +FS: 00007f44a5425740(0000) GS:ffff888237d80000(0000) knlGS:0000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000100030973 CR3: 000000010722c000 CR4: 00000000000006e0 +Call Trace: + + ? show_regs+0x8c/0xa0 + ? __die_body+0x23/0xa0 + ? __die+0x3a/0x50 + ? page_fault_oops+0x1d2/0x5c0 + ? partition_sched_domains_locked+0x483/0x600 + ? search_module_extables+0x2a/0xb0 + ? search_exception_tables+0x67/0x90 + ? kernelmode_fixup_or_oops+0x144/0x1b0 + ? __bad_area_nosemaphore+0x211/0x360 + ? up_read+0x3b/0x50 + ? bad_area_nosemaphore+0x1a/0x30 + ? exc_page_fault+0x890/0xd90 + ? __lock_acquire.constprop.0+0x24f/0x8d0 + ? __lock_acquire.constprop.0+0x24f/0x8d0 + ? asm_exc_page_fault+0x26/0x30 + ? partition_sched_domains_locked+0x483/0x600 + ? partition_sched_domains_locked+0xf0/0x600 + rebuild_sched_domains_locked+0x806/0xdc0 + update_partition_sd_lb+0x118/0x130 + cpuset_write_resmask+0xffc/0x1420 + cgroup_file_write+0xb2/0x290 + kernfs_fop_write_iter+0x194/0x290 + new_sync_write+0xeb/0x160 + vfs_write+0x16f/0x1d0 + ksys_write+0x81/0x180 + __x64_sys_write+0x21/0x30 + x64_sys_call+0x2f25/0x4630 + do_syscall_64+0x44/0xb0 + entry_SYSCALL_64_after_hwframe+0x78/0xe2 +RIP: 0033:0x7f44a553c887 + +It can be reproduced with cammands: +cd /sys/fs/cgroup/ +mkdir test +cd test/ +echo +cpuset > ../cgroup.subtree_control +echo root > cpuset.cpus.partition +cat /sys/fs/cgroup/cpuset.cpus.effective +0-3 +echo 0-3 > cpuset.cpus // taking away all cpus from root + +This issue is caused by the incorrect rebuilding of scheduling domains. +In this scenario, test/cpuset.cpus.partition should be an invalid root +and should not trigger the rebuilding of scheduling domains. When calling +update_parent_effective_cpumask with partcmd_update, if newmask is not +null, it should recheck newmask whether there are cpus is available +for parect/cs that has tasks. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关** + 是的,这个 CVE 与 cgroup 和隔离机制密切相关。具体来说,问题出现在 Linux 内核的 cgroup(控制组)子系统中,特别是 cpuset 功能部分。cpuset 用于限制进程可以使用的 CPU 和内存节点,是实现资源隔离的重要组成部分。 + +2. **这是什么程序的漏洞?如何发生?有何效果?** + - **程序**:这是一个 Linux 内核(Kernel)的漏洞,具体涉及 cgroup 的 cpuset 子系统。 + - **漏洞发生过程**:当用户通过 sysfs 接口操作 cgroup 的 cpuset 属性时,如果尝试将根分区的所有 CPU 资源移除(例如 `echo 0-3 > cpuset.cpus`),内核会错误地触发调度域(scheduling domains)的重建。然而,在这种情况下,`test/cpuset.cpus.partition` 应该是一个无效的根分区,不应该触发调度域的重建。由于内核没有正确检查新掩码(newmask)是否仍然有可用的 CPU 资源,最终导致内核崩溃(kernel panic)。 + - **影响后果**:攻击者可以通过特定的 sysfs 操作(如上述命令序列)触发内核崩溃,从而导致系统不可用(拒绝服务攻击,Denial of Service, DoS)。在容器化环境中,如果容器拥有对宿主机 cgroup 系统的访问权限,可能会利用此漏洞影响整个宿主机系统的稳定性。 + +cve: ./data/2024/44xxx/CVE-2024-44991.json +In the Linux kernel, the following vulnerability has been resolved: + +tcp: prevent concurrent execution of tcp_sk_exit_batch + +Its possible that two threads call tcp_sk_exit_batch() concurrently, +once from the cleanup_net workqueue, once from a task that failed to clone +a new netns. In the latter case, error unwinding calls the exit handlers +in reverse order for the 'failed' netns. + +tcp_sk_exit_batch() calls tcp_twsk_purge(). +Problem is that since commit b099ce2602d8 ("net: Batch inet_twsk_purge"), +this function picks up twsk in any dying netns, not just the one passed +in via exit_batch list. + +This means that the error unwind of setup_net() can "steal" and destroy +timewait sockets belonging to the exiting netns. + +This allows the netns exit worker to proceed to call + +WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); + +without the expected 1 -> 0 transition, which then splats. + +At same time, error unwind path that is also running inet_twsk_purge() +will splat as well: + +WARNING: .. at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 +... + refcount_dec include/linux/refcount.h:351 [inline] + inet_twsk_kill+0x758/0x9c0 net/ipv4/inet_timewait_sock.c:70 + inet_twsk_deschedule_put net/ipv4/inet_timewait_sock.c:221 + inet_twsk_purge+0x725/0x890 net/ipv4/inet_timewait_sock.c:304 + tcp_sk_exit_batch+0x1c/0x170 net/ipv4/tcp_ipv4.c:3522 + ops_exit_list+0x128/0x180 net/core/net_namespace.c:178 + setup_net+0x714/0xb40 net/core/net_namespace.c:375 + copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 + create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 + +... because refcount_dec() of tw_refcount unexpectedly dropped to 0. + +This doesn't seem like an actual bug (no tw sockets got lost and I don't +see a use-after-free) but as erroneous trigger of debug check. + +Add a mutex to force strict ordering: the task that calls tcp_twsk_purge() +blocks other task from doing final _dec_and_test before mutex-owner has +removed all tw sockets of dying netns. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个 CVE 与 namespace 相关。具体来说,它涉及网络命名空间(netns)的退出和销毁过程中的竞态条件问题。 + +2. **程序漏洞分析:** + - **这是什么程序的漏洞:** + 这是 Linux 内核(Kernel)中的一个漏洞。 + - **漏洞如何发生:** + 在处理网络命名空间(netns)退出的过程中,`tcp_sk_exit_batch()` 函数可能会被两个线程并发调用:一个是来自 `cleanup_net` 工作队列,另一个是由于克隆新的 netns 失败而触发的错误回滚路径。这种并发执行会导致 `tcp_twsk_purge()` 错误地操作属于其他正在退出的 netns 的时间等待(timewait)套接字。 + - **漏洞效果:** + 该漏洞可能导致以下后果: + - 在 netns 退出过程中,`WARN_ON_ONCE` 检查失败,因为引用计数器(refcount)未能按预期从 1 转变为 0。 + - 错误回滚路径上的 `inet_twsk_purge()` 可能会触发警告(WARNING),因为引用计数器意外下降到 0。 + - 尽管没有实际的套接字丢失或 use-after-free 问题,但调试检查会被错误触发,可能影响系统的稳定性和日志的准确性。 + +总结:这是一个 Linux 内核中与网络命名空间相关的竞态条件漏洞,主要影响 netns 的退出和销毁过程。 + +cve: ./data/2024/45xxx/CVE-2024-45310.json +runc is a CLI tool for spawning and running containers according to the OCI specification. runc 1.1.13 and earlier, as well as 1.2.0-rc2 and earlier, can be tricked into creating empty files or directories in arbitrary locations in the host filesystem by sharing a volume between two containers and exploiting a race with `os.MkdirAll`. While this could be used to create empty files, existing files would not be truncated. An attacker must have the ability to start containers using some kind of custom volume configuration. Containers using user namespaces are still affected, but the scope of places an attacker can create inodes can be significantly reduced. Sufficiently strict LSM policies (SELinux/Apparmor) can also in principle block this attack -- we suspect the industry standard SELinux policy may restrict this attack's scope but the exact scope of protection hasn't been analysed. This is exploitable using runc directly as well as through Docker and Kubernetes. The issue is fixed in runc v1.1.14 and v1.2.0-rc3. + +Some workarounds are available. Using user namespaces restricts this attack fairly significantly such that the attacker can only create inodes in directories that the remapped root user/group has write access to. Unless the root user is remapped to an actual +user on the host (such as with rootless containers that don't use `/etc/sub[ug]id`), this in practice means that an attacker would only be able to create inodes in world-writable directories. A strict enough SELinux or AppArmor policy could in principle also restrict the scope if a specific label is applied to the runc runtime, though neither the extent to which the standard existing policies block this attack nor what exact policies are needed to sufficiently restrict this attack have been thoroughly tested. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器技术密切相关,特别是涉及 `runc`(一个 OCI 容器运行时工具)的行为。它还提到了用户命名空间(user namespaces)以及如何通过共享卷和竞争条件(race condition)在主机文件系统中创建空文件或目录。此外,CVE 提到容器隔离机制(如 SELinux 和 AppArmor)可能限制攻击范围。 + +2. **这是什么程序的漏洞?** + - **程序**:这是一个 `runc` 的漏洞,`runc` 是实现 OCI 标准的容器运行时工具,广泛用于 Docker 和 Kubernetes 等容器管理系统。 + - **漏洞发生原因**:攻击者可以通过配置两个容器共享同一个卷,并利用 `os.MkdirAll` 函数中的竞争条件,在主机文件系统的任意位置创建空文件或目录。 + - **效果**:虽然现有文件不会被截断,但攻击者可以在主机文件系统中创建新的空文件或目录,从而可能导致权限提升或破坏系统完整性。此问题在直接使用 `runc` 或通过 Docker 和 Kubernetes 使用时均可被利用。 + +3. **总结**: + 该漏洞与容器技术紧密相关,影响 `runc`,并通过共享卷和竞争条件在主机文件系统中创建文件或目录。使用用户命名空间或严格的 LSM 策略(如 SELinux/AppArmor)可以限制攻击范围。 + +cve: ./data/2024/45xxx/CVE-2024-45497.json +A flaw was found in the OpenShift build process, where the docker-build container is configured with a hostPath volume mount that maps the node's /var/lib/kubelet/config.json file into the build pod. This file contains sensitive credentials necessary for pulling images from private repositories. The mount is not read-only, which allows the attacker to overwrite it. By modifying the config.json file, the attacker can cause a denial of service by preventing the node from pulling new images and potentially exfiltrating sensitive secrets. This flaw impacts the availability of services dependent on image pulls and exposes sensitive information to unauthorized parties. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是OpenShift中与Docker容器实现相关的漏洞。具体来说: + - 漏洞发生在OpenShift的docker-build容器配置过程中。 + - 问题在于docker-build容器被错误地配置了一个hostPath卷挂载,将节点上的`/var/lib/kubelet/config.json`文件映射到构建Pod中。 + - 该挂载未设置为只读,导致攻击者可以覆盖此文件。 + - 攻击效果包括: + - 修改`config.json`文件后,阻止节点拉取新镜像,造成服务不可用(拒绝服务)。 + - 泄露包含私有仓库拉取凭证的敏感信息。 + +结论:这是一个容器编排平台(OpenShift)在配置Docker容器时的漏洞,涉及容器隔离机制的不当配置。 + +cve: ./data/2024/47xxx/CVE-2024-47742.json +In the Linux kernel, the following vulnerability has been resolved: + +firmware_loader: Block path traversal + +Most firmware names are hardcoded strings, or are constructed from fairly +constrained format strings where the dynamic parts are just some hex +numbers or such. + +However, there are a couple codepaths in the kernel where firmware file +names contain string components that are passed through from a device or +semi-privileged userspace; the ones I could find (not counting interfaces +that require root privileges) are: + + - lpfc_sli4_request_firmware_update() seems to construct the firmware + filename from "ModelName", a string that was previously parsed out of + some descriptor ("Vital Product Data") in lpfc_fill_vpd() + - nfp_net_fw_find() seems to construct a firmware filename from a model + name coming from nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno"), which I + think parses some descriptor that was read from the device. + (But this case likely isn't exploitable because the format string looks + like "netronome/nic_%s", and there shouldn't be any *folders* starting + with "netronome/nic_". The previous case was different because there, + the "%s" is *at the start* of the format string.) + - module_flash_fw_schedule() is reachable from the + ETHTOOL_MSG_MODULE_FW_FLASH_ACT netlink command, which is marked as + GENL_UNS_ADMIN_PERM (meaning CAP_NET_ADMIN inside a user namespace is + enough to pass the privilege check), and takes a userspace-provided + firmware name. + (But I think to reach this case, you need to have CAP_NET_ADMIN over a + network namespace that a special kind of ethernet device is mapped into, + so I think this is not a viable attack path in practice.) + +Fix it by rejecting any firmware names containing ".." path components. + +For what it's worth, I went looking and haven't found any USB device +drivers that use the firmware loader dangerously. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是。该漏洞涉及 `CAP_NET_ADMIN` 权限在用户命名空间(user namespace)中的使用,这与 Linux 容器的隔离机制密切相关,因为容器通常依赖用户命名空间来实现权限隔离。 + +2. **程序及漏洞分析:** + - **程序:** 这是 Linux 内核(Kernel)的漏洞。 + - **漏洞发生原因:** 漏洞存在于内核的固件加载器(firmware loader)中,某些代码路径允许从设备或半特权用户空间传递字符串组件以构造固件文件名。特别是 `module_flash_fw_schedule()` 函数可以通过 `ETHTOOL_MSG_MODULE_FW_FLASH_ACT` Netlink 命令被触发,而该命令只需要 `CAP_NET_ADMIN` 权限(在用户命名空间中即可满足)。如果用户提供的固件名称包含路径遍历字符(如 `..`),可能会导致意外的文件访问。 + - **效果:** 攻击者可能利用此漏洞进行路径遍历攻击,访问或覆盖系统上的其他文件,从而破坏系统的完整性和安全性。虽然描述中提到实际可利用性有限(需要特定网络设备映射到网络命名空间),但在容器环境中,这种漏洞可能被用来突破隔离限制,影响宿主系统或其他容器的安全性。 + +cve: ./data/2024/49xxx/CVE-2024-49974.json +In the Linux kernel, the following vulnerability has been resolved: + +NFSD: Limit the number of concurrent async COPY operations + +Nothing appears to limit the number of concurrent async COPY +operations that clients can start. In addition, AFAICT each async +COPY can copy an unlimited number of 4MB chunks, so can run for a +long time. Thus IMO async COPY can become a DoS vector. + +Add a restriction mechanism that bounds the number of concurrent +background COPY operations. Start simple and try to be fair -- this +patch implements a per-namespace limit. + +An async COPY request that occurs while this limit is exceeded gets +NFS4ERR_DELAY. The requesting client can choose to send the request +again after a delay or fall back to a traditional read/write style +copy. + +If there is need to make the mechanism more sophisticated, we can +visit that in future patches. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 信息与 namespace 相关。描述中明确提到实现了一个“per-namespace limit”机制,用于限制每个命名空间中并发的异步 COPY 操作数量。 + +2. **程序漏洞分析**: + - **程序**:这是 Linux 内核 (Kernel) 的漏洞。 + - **漏洞发生原因**:在 NFS(Network File System)服务中,没有对客户端可以发起的并发异步 COPY 操作数量进行限制。此外,每个异步 COPY 操作可以复制无限数量的 4MB 数据块,导致操作可能持续很长时间。这使得攻击者可以通过发起大量异步 COPY 操作,造成系统资源耗尽,从而形成拒绝服务 (DoS) 攻击。 + - **效果**:攻击者可以利用此漏洞通过消耗系统资源(如 CPU、内存等),使系统无法响应其他合法请求,导致拒绝服务。 + +总结:这是一个 Linux 内核中的漏洞,与 namespace 隔离机制相关,通过限制异步 COPY 操作的数量来修复潜在的 DoS 攻击问题。 + +cve: ./data/2024/50xxx/CVE-2024-50019.json +In the Linux kernel, the following vulnerability has been resolved: + +kthread: unpark only parked kthread + +Calling into kthread unparking unconditionally is mostly harmless when +the kthread is already unparked. The wake up is then simply ignored +because the target is not in TASK_PARKED state. + +However if the kthread is per CPU, the wake up is preceded by a call +to kthread_bind() which expects the task to be inactive and in +TASK_PARKED state, which obviously isn't the case if it is unparked. + +As a result, calling kthread_stop() on an unparked per-cpu kthread +triggers such a warning: + + WARNING: CPU: 0 PID: 11 at kernel/kthread.c:525 __kthread_bind_mask kernel/kthread.c:525 + + kthread_stop+0x17a/0x630 kernel/kthread.c:707 + destroy_workqueue+0x136/0xc40 kernel/workqueue.c:5810 + wg_destruct+0x1e2/0x2e0 drivers/net/wireguard/device.c:257 + netdev_run_todo+0xe1a/0x1000 net/core/dev.c:10693 + default_device_exit_batch+0xa14/0xa90 net/core/dev.c:11769 + ops_exit_list net/core/net_namespace.c:178 [inline] + cleanup_net+0x89d/0xcc0 net/core/net_namespace.c:640 + process_one_work kernel/workqueue.c:3231 [inline] + process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 + worker_thread+0x86d/0xd70 kernel/workqueue.c:3393 + kthread+0x2f0/0x390 kernel/kthread.c:389 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + + +Fix this with skipping unecessary unparking while stopping a kthread. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **程序漏洞分析**: + 这是一个 Linux 内核 (Kernel) 的漏洞,具体发生在 `kthread`(内核线程)管理相关的代码中。问题的核心是当调用 `kthread_stop()` 停止一个已经处于未暂停状态的 per-CPU kthread 时,会触发警告。这是因为 `kthread_bind()` 函数期望任务处于非活动状态且为 `TASK_PARKED` 状态,而实际上任务并未暂停。 + + **漏洞如何发生**: + - 当尝试停止一个 per-CPU 的 kthread 时,如果该线程已经处于未暂停状态,代码会错误地尝试对其进行解绑操作。 + - 这种情况下,`kthread_bind()` 被调用时发现任务并非处于预期的 `TASK_PARKED` 状态,从而触发警告。 + + **效果**: + - 此漏洞会导致内核发出警告信息(如示例中的 `WARNING: CPU: 0 PID: 11...`),虽然不会直接导致系统崩溃,但可能影响系统的稳定性和调试体验。 + - 在极端情况下,这种警告可能会掩盖其他更重要的问题,增加排查难度。 + +3. **结论**: + N/A + +cve: ./data/2024/50xxx/CVE-2024-50066.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/mremap: fix move_normal_pmd/retract_page_tables race + +In mremap(), move_page_tables() looks at the type of the PMD entry and the +specified address range to figure out by which method the next chunk of +page table entries should be moved. + +At that point, the mmap_lock is held in write mode, but no rmap locks are +held yet. For PMD entries that point to page tables and are fully covered +by the source address range, move_pgt_entry(NORMAL_PMD, ...) is called, +which first takes rmap locks, then does move_normal_pmd(). +move_normal_pmd() takes the necessary page table locks at source and +destination, then moves an entire page table from the source to the +destination. + +The problem is: The rmap locks, which protect against concurrent page +table removal by retract_page_tables() in the THP code, are only taken +after the PMD entry has been read and it has been decided how to move it. +So we can race as follows (with two processes that have mappings of the +same tmpfs file that is stored on a tmpfs mount with huge=advise); note +that process A accesses page tables through the MM while process B does it +through the file rmap: + +process A process B +========= ========= +mremap + mremap_to + move_vma + move_page_tables + get_old_pmd + alloc_new_pmd + *** PREEMPT *** + madvise(MADV_COLLAPSE) + do_madvise + madvise_walk_vmas + madvise_vma_behavior + madvise_collapse + hpage_collapse_scan_file + collapse_file + retract_page_tables + i_mmap_lock_read(mapping) + pmdp_collapse_flush + i_mmap_unlock_read(mapping) + move_pgt_entry(NORMAL_PMD, ...) + take_rmap_locks + move_normal_pmd + drop_rmap_locks + +When this happens, move_normal_pmd() can end up creating bogus PMD entries +in the line `pmd_populate(mm, new_pmd, pmd_pgtable(pmd))`. The effect +depends on arch-specific and machine-specific details; on x86, you can end +up with physical page 0 mapped as a page table, which is likely +exploitable for user->kernel privilege escalation. + +Fix the race by letting process B recheck that the PMD still points to a +page table after the rmap locks have been taken. Otherwise, we bail and +let the caller fall back to the PTE-level copying path, which will then +bail immediately at the pmd_none() check. + +Bug reachability: Reaching this bug requires that you can create +shmem/file THP mappings - anonymous THP uses different code that doesn't +zap stuff under rmap locks. File THP is gated on an experimental config +flag (CONFIG_READ_ONLY_THP_FOR_FS), so on normal distro kernels you need +shmem THP to hit this bug. As far as I know, getting shmem THP normally +requires that you can mount your own tmpfs with the right mount flags, +which would require creating your own user+mount namespace; though I don't +know if some distros maybe enable shmem THP by default or something like +that. + +Bug impact: This issue can likely be used for user->kernel privilege +escalation when it is reachable. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是。虽然漏洞的核心是内核中的 `mremap()` 函数竞态问题,但触发此漏洞需要创建 shmem/file THP 映射,这通常需要挂载一个带有特定标志的 tmpfs。而挂载 tmpfs 通常需要使用用户命名空间(user namespace)和挂载命名空间(mount namespace),这些功能在容器技术(如 Docker 和 Kubernetes)中广泛使用。因此,该漏洞可能与容器环境中的隔离性有关。 + +2. **程序漏洞分析:** + - **程序类型:** Linux 内核(Kernel)。 + - **漏洞发生原因:** 在 `mremap()` 函数中,`move_page_tables()` 检查 PMD 条目类型时,未正确处理与 `retract_page_tables()` 的竞态条件。具体来说,在读取 PMD 条目后但在获取 rmap 锁之前,另一个进程可能通过 `madvise(MADV_COLLAPSE)` 修改了 PMD 条目,导致 `move_normal_pmd()` 创建了无效的 PMD 条目。 + - **漏洞效果:** 此漏洞可能导致用户态到内核态的权限提升(privilege escalation)。在 x86 架构上,可能会将物理页 0 映射为页表,从而被攻击者利用来执行恶意操作。 + +总结:这是一个 Linux 内核中的竞态条件漏洞,可能在容器环境中被利用,特别是当容器允许挂载 tmpfs 或使用特定配置时,会影响隔离性并导致权限提升。 + +cve: ./data/2024/50xxx/CVE-2024-50130.json +In the Linux kernel, the following vulnerability has been resolved: + +netfilter: bpf: must hold reference on net namespace + +BUG: KASAN: slab-use-after-free in __nf_unregister_net_hook+0x640/0x6b0 +Read of size 8 at addr ffff8880106fe400 by task repro/72= +bpf_nf_link_release+0xda/0x1e0 +bpf_link_free+0x139/0x2d0 +bpf_link_release+0x68/0x80 +__fput+0x414/0xb60 + +Eric says: + It seems that bpf was able to defer the __nf_unregister_net_hook() + after exit()/close() time. + Perhaps a netns reference is missing, because the netns has been + dismantled/freed already. + bpf_nf_link_attach() does : + link->net = net; + But I do not see a reference being taken on net. + +Add such a reference and release it after hook unreg. +Note that I was unable to get syzbot reproducer to work, so I +do not know if this resolves this splat. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个CVE与namespace相关,具体涉及网络命名空间(net namespace)。 + +2. **程序漏洞分析:** + - **程序类型:** 这是Linux内核(Kernel)中的漏洞。 + - **漏洞发生原因:** 该漏洞出现在netfilter和BPF(Berkeley Packet Filter)的交互中。问题的核心在于,在使用BPF程序附加到网络命名空间时,代码没有正确持有对网络命名空间的引用计数(reference count)。当网络命名空间被销毁或释放后,BPF程序可能仍然尝试访问已释放的内存区域,从而导致“use-after-free”错误。 + - **漏洞效果:** 这种情况可能导致内核崩溃(kernel panic)或信息泄漏,攻击者可能利用此漏洞破坏系统的稳定性或进一步提升权限。在容器环境中,这种漏洞可能会影响基于网络命名空间的隔离性,使得恶意容器能够影响宿主机或其他容器的网络功能。 + +cve: ./data/2024/50xxx/CVE-2024-50204.json +In the Linux kernel, the following vulnerability has been resolved: + +fs: don't try and remove empty rbtree node + +When copying a namespace we won't have added the new copy into the +namespace rbtree until after the copy succeeded. Calling free_mnt_ns() +will try to remove the copy from the rbtree which is invalid. Simply +free the namespace skeleton directly. + +analysis: 1. 该CVE信息与namespace相关。 + +2. 这是Linux内核的漏洞。该漏洞发生在处理文件系统命名空间(namespace)时,具体是在复制命名空间的过程中。在复制成功之前,新的副本不会被加入到命名空间的红黑树(rbtree)中,但是`free_mnt_ns()`函数会尝试从红黑树中移除这个副本,这是无效的操作。此漏洞可能导致内存损坏或系统崩溃,影响系统的稳定性和安全性。 + +cve: ./data/2024/50xxx/CVE-2024-50235.json +In the Linux kernel, the following vulnerability has been resolved: + +wifi: cfg80211: clear wdev->cqm_config pointer on free + +When we free wdev->cqm_config when unregistering, we also +need to clear out the pointer since the same wdev/netdev +may get re-registered in another network namespace, then +destroyed later, running this code again, which results in +a double-free. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 相关。问题描述中提到 `wdev/netdev` 可能在另一个网络命名空间(network namespace)中重新注册,这表明该漏洞涉及 Linux 的网络命名空间功能,而网络命名空间是容器隔离机制的重要组成部分。 + +2. **程序的漏洞分析**: + - **程序**:这是 Linux 内核(Kernel)中的漏洞。 + - **漏洞发生原因**:在注销 `wdev->cqm_config` 时,内核没有清空指针,而该 `wdev/netdev` 对象可能在另一个网络命名空间中被重新注册并最终销毁。当再次运行相关代码时,会导致双重释放(double-free)的问题。 + - **影响后果**:双重释放可能导致内存损坏、系统崩溃或潜在的安全风险(如任意代码执行)。这种问题在容器环境中尤其危险,因为它可能破坏容器之间的隔离性,甚至影响宿主机的稳定性。 + +cve: ./data/2024/53xxx/CVE-2024-53079.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/thp: fix deferred split unqueue naming and locking + +Recent changes are putting more pressure on THP deferred split queues: +under load revealing long-standing races, causing list_del corruptions, +"Bad page state"s and worse (I keep BUGs in both of those, so usually +don't get to see how badly they end up without). The relevant recent +changes being 6.8's mTHP, 6.10's mTHP swapout, and 6.12's mTHP swapin, +improved swap allocation, and underused THP splitting. + +Before fixing locking: rename misleading folio_undo_large_rmappable(), +which does not undo large_rmappable, to folio_unqueue_deferred_split(), +which is what it does. But that and its out-of-line __callee are mm +internals of very limited usability: add comment and WARN_ON_ONCEs to +check usage; and return a bool to say if a deferred split was unqueued, +which can then be used in WARN_ON_ONCEs around safety checks (sparing +callers the arcane conditionals in __folio_unqueue_deferred_split()). + +Just omit the folio_unqueue_deferred_split() from free_unref_folios(), all +of whose callers now call it beforehand (and if any forget then bad_page() +will tell) - except for its caller put_pages_list(), which itself no +longer has any callers (and will be deleted separately). + +Swapout: mem_cgroup_swapout() has been resetting folio->memcg_data 0 +without checking and unqueueing a THP folio from deferred split list; +which is unfortunate, since the split_queue_lock depends on the memcg +(when memcg is enabled); so swapout has been unqueueing such THPs later, +when freeing the folio, using the pgdat's lock instead: potentially +corrupting the memcg's list. __remove_mapping() has frozen refcount to 0 +here, so no problem with calling folio_unqueue_deferred_split() before +resetting memcg_data. + +That goes back to 5.4 commit 87eaceb3faa5 ("mm: thp: make deferred split +shrinker memcg aware"): which included a check on swapcache before adding +to deferred queue, but no check on deferred queue before adding THP to +swapcache. That worked fine with the usual sequence of events in reclaim +(though there were a couple of rare ways in which a THP on deferred queue +could have been swapped out), but 6.12 commit dafff3f4c850 ("mm: split +underused THPs") avoids splitting underused THPs in reclaim, which makes +swapcache THPs on deferred queue commonplace. + +Keep the check on swapcache before adding to deferred queue? Yes: it is +no longer essential, but preserves the existing behaviour, and is likely +to be a worthwhile optimization (vmstat showed much more traffic on the +queue under swapping load if the check was removed); update its comment. + +Memcg-v1 move (deprecated): mem_cgroup_move_account() has been changing +folio->memcg_data without checking and unqueueing a THP folio from the +deferred list, sometimes corrupting "from" memcg's list, like swapout. +Refcount is non-zero here, so folio_unqueue_deferred_split() can only be +used in a WARN_ON_ONCE to validate the fix, which must be done earlier: +mem_cgroup_move_charge_pte_range() first try to split the THP (splitting +of course unqueues), or skip it if that fails. Not ideal, but moving +charge has been requested, and khugepaged should repair the THP later: +nobody wants new custom unqueueing code just for this deprecated case. + +The 87eaceb3faa5 commit did have the code to move from one deferred list +to another (but was not conscious of its unsafety while refcount non-0); +but that was removed by 5.6 commit fac0516b5534 ("mm: thp: don't need care +deferred split queue in memcg charge move path"), which argued that the +existence of a PMD mapping guarantees that the THP cannot be on a deferred +list. As above, false in rare cases, and now commonly false. + +Backport to 6.11 should be straightforward. Earlier backports must take +care that other _deferred_list fixes and dependencies are included. There +is not a strong case for backports, but they can fix cornercases. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该漏洞与 `cgroup` 和内存管理相关。具体来说,问题涉及 `mem_cgroup_swapout()` 和 `mem_cgroup_move_account()` 的实现,这些函数与控制组(cgroup)中的内存控制器直接相关。此外,问题还涉及到 `THP (Transparent Huge Pages)` 的延迟拆分队列在多内存控制组环境下的正确性。 + +2. **这是什么程序的漏洞:** + 该漏洞发生在 **Linux 内核 (Kernel)** 中,具体是内存管理子系统中的透明大页(THP)功能部分。 + + **漏洞发生原因:** + - 在 `mem_cgroup_swapout()` 函数中,当将一个 THP folio 添加到交换缓存(swapcache)时,没有正确检查和解除该 folio 是否在延迟拆分队列中。这可能导致在启用内存控制组(memcg)的情况下,延迟拆分队列的锁依赖于 memcg,从而引发潜在的列表损坏或竞争条件。 + - 另外,在内存控制组版本 1 的移动操作(`mem_cgroup_move_account()`)中,同样存在类似的问题,即在修改 folio 的 memcg 数据时,没有正确处理延迟拆分队列的状态,可能破坏源 memcg 的列表。 + + **漏洞效果:** + - 可能导致内核崩溃("Bad page state" 或其他 BUG 触发)。 + - 在高负载情况下,可能引发内存管理子系统的数据损坏或竞争条件,进一步影响系统的稳定性和性能。 + - 对于使用 cgroup 进行资源隔离的场景(例如容器环境),这种问题可能会破坏容器之间的内存隔离,导致不可预测的行为。 + +cve: ./data/2024/53xxx/CVE-2024-53095.json +In the Linux kernel, the following vulnerability has been resolved: + +smb: client: Fix use-after-free of network namespace. + +Recently, we got a customer report that CIFS triggers oops while +reconnecting to a server. [0] + +The workload runs on Kubernetes, and some pods mount CIFS servers +in non-root network namespaces. The problem rarely happened, but +it was always while the pod was dying. + +The root cause is wrong reference counting for network namespace. + +CIFS uses kernel sockets, which do not hold refcnt of the netns that +the socket belongs to. That means CIFS must ensure the socket is +always freed before its netns; otherwise, use-after-free happens. + +The repro steps are roughly: + + 1. mount CIFS in a non-root netns + 2. drop packets from the netns + 3. destroy the netns + 4. unmount CIFS + +We can reproduce the issue quickly with the script [1] below and see +the splat [2] if CONFIG_NET_NS_REFCNT_TRACKER is enabled. + +When the socket is TCP, it is hard to guarantee the netns lifetime +without holding refcnt due to async timers. + +Let's hold netns refcnt for each socket as done for SMC in commit +9744d2bf1976 ("smc: Fix use-after-free in tcp_write_timer_handler()."). + +Note that we need to move put_net() from cifs_put_tcp_session() to +clean_demultiplex_info(); otherwise, __sock_create() still could touch a +freed netns while cifsd tries to reconnect from cifs_demultiplex_thread(). + +Also, maybe_get_net() cannot be put just before __sock_create() because +the code is not under RCU and there is a small chance that the same +address happened to be reallocated to another netns. + +[0]: +CIFS: VFS: \\XXXXXXXXXXX has not responded in 15 seconds. Reconnecting... +CIFS: Serverclose failed 4 times, giving up +Unable to handle kernel paging request at virtual address 14de99e461f84a07 +Mem abort info: + ESR = 0x0000000096000004 + EC = 0x25: DABT (current EL), IL = 32 bits + SET = 0, FnV = 0 + EA = 0, S1PTW = 0 + FSC = 0x04: level 0 translation fault +Data abort info: + ISV = 0, ISS = 0x00000004 + CM = 0, WnR = 0 +[14de99e461f84a07] address between user and kernel address ranges +Internal error: Oops: 0000000096000004 [#1] SMP +Modules linked in: cls_bpf sch_ingress nls_utf8 cifs cifs_arc4 cifs_md4 dns_resolver tcp_diag inet_diag veth xt_state xt_connmark nf_conntrack_netlink xt_nat xt_statistic xt_MASQUERADE xt_mark xt_addrtype ipt_REJECT nf_reject_ipv4 nft_chain_nat nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment nft_compat nf_tables nfnetlink overlay nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sm4_ce_cipher sm4 sm3_ce sm3 sha3_ce sha512_ce sha512_arm64 sha1_ce ena button sch_fq_codel loop fuse configfs dmi_sysfs sha2_ce sha256_arm64 dm_mirror dm_region_hash dm_log dm_mod dax efivarfs +CPU: 5 PID: 2690970 Comm: cifsd Not tainted 6.1.103-109.184.amzn2023.aarch64 #1 +Hardware name: Amazon EC2 r7g.4xlarge/, BIOS 1.0 11/1/2018 +pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : fib_rules_lookup+0x44/0x238 +lr : __fib_lookup+0x64/0xbc +sp : ffff8000265db790 +x29: ffff8000265db790 x28: 0000000000000000 x27: 000000000000bd01 +x26: 0000000000000000 x25: ffff000b4baf8000 x24: ffff00047b5e4580 +x23: ffff8000265db7e0 x22: 0000000000000000 x21: ffff00047b5e4500 +x20: ffff0010e3f694f8 x19: 14de99e461f849f7 x18: 0000000000000000 +x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 +x14: 0000000000000000 x13: 0000000000000000 x12: 3f92800abd010002 +x11: 0000000000000001 x10: ffff0010e3f69420 x9 : ffff800008a6f294 +x8 : 0000000000000000 x7 : 0000000000000006 x6 : 0000000000000000 +x5 : 0000000000000001 x4 : ffff001924354280 x3 : ffff8000265db7e0 +x2 : 0000000000000000 x1 : ffff0010e3f694f8 x0 : ffff00047b5e4500 +Call trace: + fib_rules_lookup+0x44/0x238 + __fib_lookup+0x64/0xbc + ip_route_output_key_hash_rcu+0x2c4/0x398 + ip_route_output_key_hash+0x60/0x8c + tcp_v4_connect+0x290/0x488 + __inet_stream_connect+0x108/0x3d0 + inet_stream_connect+0x50/0x78 + kernel_connect+0x6c/0xac + generic_ip_conne +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该 CVE 与 namespace 和容器隔离相关。具体来说,问题发生在非根网络命名空间(non-root network namespace)中,涉及 CIFS 文件系统在 Kubernetes 容器环境下的使用。 + +2. **这是什么程序的漏洞**: + - 漏洞存在于 **Linux 内核 (Kernel)** 中。 + - 漏洞发生的原因是 CIFS 使用了内核套接字(kernel sockets),而这些套接字没有正确引用计数网络命名空间(netns)。当网络命名空间被销毁时,CIFS 的套接字可能仍然存在,导致 "use-after-free" 问题。 + - 效果:在特定条件下(如容器正在终止时),可能导致内核崩溃(Oops),从而影响系统的稳定性。此问题在 Kubernetes 环境中尤为明显,因为容器通常运行在非根网络命名空间中。 + +总结:这是一个 Linux 内核中的漏洞,与网络命名空间的引用计数管理不当有关,影响容器环境中的 CIFS 使用场景。 + +cve: ./data/2024/53xxx/CVE-2024-53168.json +In the Linux kernel, the following vulnerability has been resolved: + +sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket + +BUG: KASAN: slab-use-after-free in tcp_write_timer_handler+0x156/0x3e0 +Read of size 1 at addr ffff888111f322cd by task swapper/0/0 + +CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc4-dirty #7 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 +Call Trace: + + dump_stack_lvl+0x68/0xa0 + print_address_description.constprop.0+0x2c/0x3d0 + print_report+0xb4/0x270 + kasan_report+0xbd/0xf0 + tcp_write_timer_handler+0x156/0x3e0 + tcp_write_timer+0x66/0x170 + call_timer_fn+0xfb/0x1d0 + __run_timers+0x3f8/0x480 + run_timer_softirq+0x9b/0x100 + handle_softirqs+0x153/0x390 + __irq_exit_rcu+0x103/0x120 + irq_exit_rcu+0xe/0x20 + sysvec_apic_timer_interrupt+0x76/0x90 + + + asm_sysvec_apic_timer_interrupt+0x1a/0x20 +RIP: 0010:default_idle+0xf/0x20 +Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 + 90 90 90 90 f3 0f 1e fa 66 90 0f 00 2d 33 f8 25 00 fb f4 c3 cc cc cc + cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 +RSP: 0018:ffffffffa2007e28 EFLAGS: 00000242 +RAX: 00000000000f3b31 RBX: 1ffffffff4400fc7 RCX: ffffffffa09c3196 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff9f00590f +RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed102360835d +R10: ffff88811b041aeb R11: 0000000000000001 R12: 0000000000000000 +R13: ffffffffa202d7c0 R14: 0000000000000000 R15: 00000000000147d0 + default_idle_call+0x6b/0xa0 + cpuidle_idle_call+0x1af/0x1f0 + do_idle+0xbc/0x130 + cpu_startup_entry+0x33/0x40 + rest_init+0x11f/0x210 + start_kernel+0x39a/0x420 + x86_64_start_reservations+0x18/0x30 + x86_64_start_kernel+0x97/0xa0 + common_startup_64+0x13e/0x141 + + +Allocated by task 595: + kasan_save_stack+0x24/0x50 + kasan_save_track+0x14/0x30 + __kasan_slab_alloc+0x87/0x90 + kmem_cache_alloc_noprof+0x12b/0x3f0 + copy_net_ns+0x94/0x380 + create_new_namespaces+0x24c/0x500 + unshare_nsproxy_namespaces+0x75/0xf0 + ksys_unshare+0x24e/0x4f0 + __x64_sys_unshare+0x1f/0x30 + do_syscall_64+0x70/0x180 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Freed by task 100: + kasan_save_stack+0x24/0x50 + kasan_save_track+0x14/0x30 + kasan_save_free_info+0x3b/0x60 + __kasan_slab_free+0x54/0x70 + kmem_cache_free+0x156/0x5d0 + cleanup_net+0x5d3/0x670 + process_one_work+0x776/0xa90 + worker_thread+0x2e2/0x560 + kthread+0x1a8/0x1f0 + ret_from_fork+0x34/0x60 + ret_from_fork_asm+0x1a/0x30 + +Reproduction script: + +mkdir -p /mnt/nfsshare +mkdir -p /mnt/nfs/netns_1 +mkfs.ext4 /dev/sdb +mount /dev/sdb /mnt/nfsshare +systemctl restart nfs-server +chmod 777 /mnt/nfsshare +exportfs -i -o rw,no_root_squash *:/mnt/nfsshare + +ip netns add netns_1 +ip link add name veth_1_peer type veth peer veth_1 +ifconfig veth_1_peer 11.11.0.254 up +ip link set veth_1 netns netns_1 +ip netns exec netns_1 ifconfig veth_1 11.11.0.1 + +ip netns exec netns_1 /root/iptables -A OUTPUT -d 11.11.0.254 -p tcp \ + --tcp-flags FIN FIN -j DROP + +(note: In my environment, a DESTROY_CLIENTID operation is always sent + immediately, breaking the nfs tcp connection.) +ip netns exec netns_1 timeout -s 9 300 mount -t nfs -o proto=tcp,vers=4.1 \ + 11.11.0.254:/mnt/nfsshare /mnt/nfs/netns_1 + +ip netns del netns_1 + +The reason here is that the tcp socket in netns_1 (nfs side) has been +shutdown and closed (done in xs_destroy), but the FIN message (with ack) +is discarded, and the nfsd side keeps sending retransmission messages. +As a result, when the tcp sock in netns_1 processes the received message, +it sends the message (FIN message) in the sending queue, and the tcp timer +is re-established. When the network namespace is deleted, the net structure +accessed by tcp's timer handler function causes problems. + +To fix this problem, let's hold netns refcnt for the tcp kernel socket as +done in other modules. This is an ugly hack which can easily be backported +to earlier kernels. A proper fix which cleans up the interfaces will +follow, but may not be so easy to backport. + +analysis: 1. **是否与 namespace、cgroup、container 或容器隔离相关**: + 是的,此 CVE 与 namespace 相关。具体来说,问题发生在网络命名空间(netns)中,涉及 NFS 和 TCP 连接在命名空间删除时的资源释放问题。 + +2. **程序漏洞分析**: + - **程序**:这是 Linux 内核(Kernel)中的漏洞。 + - **漏洞发生原因**: + 在使用 `ip netns` 创建的网络命名空间中,NFS 客户端与服务器之间的 TCP 连接被关闭后,FIN 消息被丢弃,而 NFS 服务器端继续发送重传消息。当网络命名空间被删除时,TCP 定时器处理函数仍然尝试访问已被释放的网络结构 (`net`),导致 use-after-free (UAF) 问题。 + - **效果**: + 攻击者可以通过特定的网络配置和操作(如创建和删除网络命名空间)触发此漏洞,可能导致内核崩溃或信息泄露。这可能会影响依赖网络命名空间的容器化环境(如 Docker 或 Kubernetes),因为这些环境通常使用网络命名空间来实现网络隔离。 + +总结:此 CVE 是 Linux 内核中与网络命名空间相关的 use-after-free 漏洞,可能影响容器化环境的稳定性与安全性。 + +cve: ./data/2024/53xxx/CVE-2024-53175.json +In the Linux kernel, the following vulnerability has been resolved: + +ipc: fix memleak if msg_init_ns failed in create_ipc_ns + +Percpu memory allocation may failed during create_ipc_ns however this +fail is not handled properly since ipc sysctls and mq sysctls is not +released properly. Fix this by release these two resource when failure. + +Here is the kmemleak stack when percpu failed: + +unreferenced object 0xffff88819de2a600 (size 512): + comm "shmem_2nstest", pid 120711, jiffies 4300542254 + hex dump (first 32 bytes): + 60 aa 9d 84 ff ff ff ff fc 18 48 b2 84 88 ff ff `.........H..... + 04 00 00 00 a4 01 00 00 20 e4 56 81 ff ff ff ff ........ .V..... + backtrace (crc be7cba35): + [] __kmalloc_node_track_caller_noprof+0x333/0x420 + [] kmemdup_noprof+0x26/0x50 + [] setup_mq_sysctls+0x57/0x1d0 + [] copy_ipcs+0x29c/0x3b0 + [] create_new_namespaces+0x1d0/0x920 + [] copy_namespaces+0x2e9/0x3e0 + [] copy_process+0x29f3/0x7ff0 + [] kernel_clone+0xc0/0x650 + [] __do_sys_clone+0xa1/0xe0 + [] do_syscall_64+0xbf/0x1c0 + [] entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该漏洞与namespace相关。`create_ipc_ns`函数涉及IPC(Inter-Process Communication)命名空间的创建,而命名空间是Linux容器实现隔离机制的核心组件之一。 + +2. **程序及漏洞分析**: + - **程序**:这是Linux内核(Kernel)的漏洞。 + - **漏洞发生原因**:在`create_ipc_ns`函数中,如果分配percpu内存失败,未正确释放已分配的资源(如ipc sysctls和mq sysctls),导致内存泄漏。 + - **效果**:虽然该漏洞不会直接导致权限提升或系统崩溃,但长期运行可能导致系统内存消耗增加,影响系统稳定性。对于使用大量IPC命名空间的场景(例如容器环境),这种内存泄漏可能被放大,从而对容器化应用的性能和资源管理造成不利影响。 + +cve: ./data/2024/53xxx/CVE-2024-53182.json +In the Linux kernel, the following vulnerability has been resolved: + +Revert "block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator()" + +This reverts commit bc3b1e9e7c50e1de0f573eea3871db61dd4787de. + +The bic is associated with sync_bfqq, and bfq_release_process_ref cannot +be put into bfq_put_cooperator. + +kasan report: +[ 400.347277] ================================================================== +[ 400.347287] BUG: KASAN: slab-use-after-free in bic_set_bfqq+0x200/0x230 +[ 400.347420] Read of size 8 at addr ffff88881cab7d60 by task dockerd/5800 +[ 400.347430] +[ 400.347436] CPU: 24 UID: 0 PID: 5800 Comm: dockerd Kdump: loaded Tainted: G E 6.12.0 #32 +[ 400.347450] Tainted: [E]=UNSIGNED_MODULE +[ 400.347454] Hardware name: VMware, Inc. VMware20,1/440BX Desktop Reference Platform, BIOS VMW201.00V.20192059.B64.2207280713 07/28/2022 +[ 400.347460] Call Trace: +[ 400.347464] +[ 400.347468] dump_stack_lvl+0x5d/0x80 +[ 400.347490] print_report+0x174/0x505 +[ 400.347521] kasan_report+0xe0/0x160 +[ 400.347541] bic_set_bfqq+0x200/0x230 +[ 400.347549] bfq_bic_update_cgroup+0x419/0x740 +[ 400.347560] bfq_bio_merge+0x133/0x320 +[ 400.347584] blk_mq_submit_bio+0x1761/0x1e20 +[ 400.347625] __submit_bio+0x28b/0x7b0 +[ 400.347664] submit_bio_noacct_nocheck+0x6b2/0xd30 +[ 400.347690] iomap_readahead+0x50c/0x680 +[ 400.347731] read_pages+0x17f/0x9c0 +[ 400.347785] page_cache_ra_unbounded+0x366/0x4a0 +[ 400.347795] filemap_fault+0x83d/0x2340 +[ 400.347819] __xfs_filemap_fault+0x11a/0x7d0 [xfs] +[ 400.349256] __do_fault+0xf1/0x610 +[ 400.349270] do_fault+0x977/0x11a0 +[ 400.349281] __handle_mm_fault+0x5d1/0x850 +[ 400.349314] handle_mm_fault+0x1f8/0x560 +[ 400.349324] do_user_addr_fault+0x324/0x970 +[ 400.349337] exc_page_fault+0x76/0xf0 +[ 400.349350] asm_exc_page_fault+0x26/0x30 +[ 400.349360] RIP: 0033:0x55a480d77375 +[ 400.349384] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 49 3b 66 10 0f 86 ae 02 00 00 55 48 89 e5 48 83 ec 58 48 8b 10 <83> 7a 10 00 0f 84 27 02 00 00 44 0f b6 42 28 44 0f b6 4a 29 41 80 +[ 400.349392] RSP: 002b:00007f18c37fd8b8 EFLAGS: 00010216 +[ 400.349401] RAX: 00007f18c37fd9d0 RBX: 0000000000000000 RCX: 0000000000000000 +[ 400.349407] RDX: 000055a484407d38 RSI: 000000c000e8b0c0 RDI: 0000000000000000 +[ 400.349412] RBP: 00007f18c37fd910 R08: 000055a484017f60 R09: 000055a484066f80 +[ 400.349417] R10: 0000000000194000 R11: 0000000000000005 R12: 0000000000000008 +[ 400.349422] R13: 0000000000000000 R14: 000000c000476a80 R15: 0000000000000000 +[ 400.349430] +[ 400.349452] +[ 400.349454] Allocated by task 5800: +[ 400.349459] kasan_save_stack+0x30/0x50 +[ 400.349469] kasan_save_track+0x14/0x30 +[ 400.349475] __kasan_slab_alloc+0x89/0x90 +[ 400.349482] kmem_cache_alloc_node_noprof+0xdc/0x2a0 +[ 400.349492] bfq_get_queue+0x1ef/0x1100 +[ 400.349502] __bfq_get_bfqq_handle_split+0x11a/0x510 +[ 400.349511] bfq_insert_requests+0xf55/0x9030 +[ 400.349519] blk_mq_flush_plug_list+0x446/0x14c0 +[ 400.349527] __blk_flush_plug+0x27c/0x4e0 +[ 400.349534] blk_finish_plug+0x52/0xa0 +[ 400.349540] _xfs_buf_ioapply+0x739/0xc30 [xfs] +[ 400.350246] __xfs_buf_submit+0x1b2/0x640 [xfs] +[ 400.350967] xfs_buf_read_map+0x306/0xa20 [xfs] +[ 400.351672] xfs_trans_read_buf_map+0x285/0x7d0 [xfs] +[ 400.352386] xfs_imap_to_bp+0x107/0x270 [xfs] +[ 400.353077] xfs_iget+0x70d/0x1eb0 [xfs] +[ 400.353786] xfs_lookup+0x2ca/0x3a0 [xfs] +[ 400.354506] xfs_vn_lookup+0x14e/0x1a0 [xfs] +[ 400.355197] __lookup_slow+0x19c/0x340 +[ 400.355204] lookup_one_unlocked+0xfc/0x120 +[ 400.355211] ovl_lookup_single+0x1b3/0xcf0 [overlay] +[ 400.355255] ovl_lookup_layer+0x316/0x490 [overlay] +[ 400.355295] ovl_lookup+0x844/0x1fd0 [overlay] +[ 400.355351] lookup_one_qstr_excl+0xef/0x150 +[ 400.355357] do_unlinkat+0x22a/0x620 +[ 400.355366] __x64_sys_unlinkat+0x109/0x1e0 +[ 400.355375] do_syscall_64+0x82/0x160 +[ 400.355384] entry_SYSCALL_64_after_hwframe+0x76/0x7 +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **程序漏洞分析**: + - **程序**:Linux Kernel + - **漏洞发生原因**:该漏洞是由于在块设备I/O调度器BFQ(Budget Fair Queuing)中,`bfq_release_process_ref()` 函数被错误地合并到 `bfq_put_cooperator()` 中导致的。这种合并破坏了 `bic`(bfq_io_cq)对象的引用计数管理逻辑,从而引发 use-after-free 问题。具体来说,`bic_set_bfqq` 函数尝试访问已经被释放的 `bic` 对象,导致内核崩溃或不稳定。 + - **效果**:此漏洞可能导致内核出现 slab-use-after-free 错误,进而引发系统崩溃或数据损坏。从日志中可以看出,`dockerd` 进程触发了该漏洞,但这并不意味着漏洞与容器隔离机制直接相关,而是因为 `dockerd` 使用了受影响的 I/O 调度路径。 + +3. **结论**:此 CVE 是 Linux 内核中的一个 I/O 调度器漏洞,与 BFQ 的实现细节相关,但不涉及 namespace、cgroup、container 或容器隔离机制。 + +cve: ./data/2024/56xxx/CVE-2024-56635.json +In the Linux kernel, the following vulnerability has been resolved: + +net: avoid potential UAF in default_operstate() + +syzbot reported an UAF in default_operstate() [1] + +Issue is a race between device and netns dismantles. + +After calling __rtnl_unlock() from netdev_run_todo(), +we can not assume the netns of each device is still alive. + +Make sure the device is not in NETREG_UNREGISTERED state, +and add an ASSERT_RTNL() before the call to +__dev_get_by_index(). + +We might move this ASSERT_RTNL() in __dev_get_by_index() +in the future. + +[1] + +BUG: KASAN: slab-use-after-free in __dev_get_by_index+0x5d/0x110 net/core/dev.c:852 +Read of size 8 at addr ffff888043eba1b0 by task syz.0.0/5339 + +CPU: 0 UID: 0 PID: 5339 Comm: syz.0.0 Not tainted 6.12.0-syzkaller-10296-gaaf20f870da0 #0 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 +Call Trace: + + __dump_stack lib/dump_stack.c:94 [inline] + dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 + print_address_description mm/kasan/report.c:378 [inline] + print_report+0x169/0x550 mm/kasan/report.c:489 + kasan_report+0x143/0x180 mm/kasan/report.c:602 + __dev_get_by_index+0x5d/0x110 net/core/dev.c:852 + default_operstate net/core/link_watch.c:51 [inline] + rfc2863_policy+0x224/0x300 net/core/link_watch.c:67 + linkwatch_do_dev+0x3e/0x170 net/core/link_watch.c:170 + netdev_run_todo+0x461/0x1000 net/core/dev.c:10894 + rtnl_unlock net/core/rtnetlink.c:152 [inline] + rtnl_net_unlock include/linux/rtnetlink.h:133 [inline] + rtnl_dellink+0x760/0x8d0 net/core/rtnetlink.c:3520 + rtnetlink_rcv_msg+0x791/0xcf0 net/core/rtnetlink.c:6911 + netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2541 + netlink_unicast_kernel net/netlink/af_netlink.c:1321 [inline] + netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1347 + netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1891 + sock_sendmsg_nosec net/socket.c:711 [inline] + __sock_sendmsg+0x221/0x270 net/socket.c:726 + ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2583 + ___sys_sendmsg net/socket.c:2637 [inline] + __sys_sendmsg+0x269/0x350 net/socket.c:2669 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f2a3cb80809 +Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f2a3d9cd058 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f2a3cd45fa0 RCX: 00007f2a3cb80809 +RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000008 +RBP: 00007f2a3cbf393e R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 0000000000000000 R14: 00007f2a3cd45fa0 R15: 00007ffd03bc65c8 + + +Allocated by task 5339: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 + poison_kmalloc_redzone mm/kasan/common.c:377 [inline] + __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:394 + kasan_kmalloc include/linux/kasan.h:260 [inline] + __kmalloc_cache_noprof+0x243/0x390 mm/slub.c:4314 + kmalloc_noprof include/linux/slab.h:901 [inline] + kmalloc_array_noprof include/linux/slab.h:945 [inline] + netdev_create_hash net/core/dev.c:11870 [inline] + netdev_init+0x10c/0x250 net/core/dev.c:11890 + ops_init+0x31e/0x590 net/core/net_namespace.c:138 + setup_net+0x287/0x9e0 net/core/net_namespace.c:362 + copy_net_ns+0x33f/0x570 net/core/net_namespace.c:500 + create_new_namespaces+0x425/0x7b0 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0x124/0x180 kernel/nsproxy.c:228 + ksys_unshare+0x57d/0xa70 kernel/fork.c:3314 + __do_sys_unshare kernel/fork.c:3385 [inline] + __se_sys_unshare kernel/fork.c:3383 [inline] + __x64_sys_unshare+0x38/0x40 kernel/fork.c:3383 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x8 +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,这个漏洞与namespace相关。具体来说,它涉及到网络命名空间(netns)的使用和生命周期管理问题。 + +2. **这是什么程序的漏洞:** + 这是Linux内核(Kernel)中的一个漏洞。漏洞发生在`default_operstate()`函数中,由于竞争条件导致了Use-After-Free(UAF)问题。 + + **漏洞发生原因:** + 在调用`__rtnl_unlock()`之后,代码假设每个设备的网络命名空间(netns)仍然存活,但实际上可能已经被销毁。这会导致在后续调用`__dev_get_by_index()`时访问已被释放的内存。 + + **漏洞效果:** + 此漏洞可能导致内核崩溃或信息泄露。攻击者可能利用此漏洞破坏系统稳定性或进一步提升权限。 + +3. **总结:** + - 漏洞类型:Use-After-Free(UAF)。 + - 相关组件:Linux内核网络子系统,特别是网络命名空间(netns)管理部分。 + - 影响:可能导致内核崩溃或信息泄露,影响系统的稳定性和安全性。 + +cve: ./data/2024/56xxx/CVE-2024-56642.json +In the Linux kernel, the following vulnerability has been resolved: + +tipc: Fix use-after-free of kernel socket in cleanup_bearer(). + +syzkaller reported a use-after-free of UDP kernel socket +in cleanup_bearer() without repro. [0][1] + +When bearer_disable() calls tipc_udp_disable(), cleanup +of the UDP kernel socket is deferred by work calling +cleanup_bearer(). + +tipc_exit_net() waits for such works to finish by checking +tipc_net(net)->wq_count. However, the work decrements the +count too early before releasing the kernel socket, +unblocking cleanup_net() and resulting in use-after-free. + +Let's move the decrement after releasing the socket in +cleanup_bearer(). + +[0]: +ref_tracker: net notrefcnt@000000009b3d1faf has 1/1 users at + sk_alloc+0x438/0x608 + inet_create+0x4c8/0xcb0 + __sock_create+0x350/0x6b8 + sock_create_kern+0x58/0x78 + udp_sock_create4+0x68/0x398 + udp_sock_create+0x88/0xc8 + tipc_udp_enable+0x5e8/0x848 + __tipc_nl_bearer_enable+0x84c/0xed8 + tipc_nl_bearer_enable+0x38/0x60 + genl_family_rcv_msg_doit+0x170/0x248 + genl_rcv_msg+0x400/0x5b0 + netlink_rcv_skb+0x1dc/0x398 + genl_rcv+0x44/0x68 + netlink_unicast+0x678/0x8b0 + netlink_sendmsg+0x5e4/0x898 + ____sys_sendmsg+0x500/0x830 + +[1]: +BUG: KMSAN: use-after-free in udp_hashslot include/net/udp.h:85 [inline] +BUG: KMSAN: use-after-free in udp_lib_unhash+0x3b8/0x930 net/ipv4/udp.c:1979 + udp_hashslot include/net/udp.h:85 [inline] + udp_lib_unhash+0x3b8/0x930 net/ipv4/udp.c:1979 + sk_common_release+0xaf/0x3f0 net/core/sock.c:3820 + inet_release+0x1e0/0x260 net/ipv4/af_inet.c:437 + inet6_release+0x6f/0xd0 net/ipv6/af_inet6.c:489 + __sock_release net/socket.c:658 [inline] + sock_release+0xa0/0x210 net/socket.c:686 + cleanup_bearer+0x42d/0x4c0 net/tipc/udp_media.c:819 + process_one_work kernel/workqueue.c:3229 [inline] + process_scheduled_works+0xcaf/0x1c90 kernel/workqueue.c:3310 + worker_thread+0xf6c/0x1510 kernel/workqueue.c:3391 + kthread+0x531/0x6b0 kernel/kthread.c:389 + ret_from_fork+0x60/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:244 + +Uninit was created at: + slab_free_hook mm/slub.c:2269 [inline] + slab_free mm/slub.c:4580 [inline] + kmem_cache_free+0x207/0xc40 mm/slub.c:4682 + net_free net/core/net_namespace.c:454 [inline] + cleanup_net+0x16f2/0x19d0 net/core/net_namespace.c:647 + process_one_work kernel/workqueue.c:3229 [inline] + process_scheduled_works+0xcaf/0x1c90 kernel/workqueue.c:3310 + worker_thread+0xf6c/0x1510 kernel/workqueue.c:3391 + kthread+0x531/0x6b0 kernel/kthread.c:389 + ret_from_fork+0x60/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:244 + +CPU: 0 UID: 0 PID: 54 Comm: kworker/0:2 Not tainted 6.12.0-rc1-00131-gf66ebf37d69c #7 91723d6f74857f70725e1583cba3cf4adc716cfa +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +Workqueue: events cleanup_bearer + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞**: + 这是Linux内核(Kernel)中的一个漏洞,具体发生在TIPC(Transparent Inter-Process Communication)模块中。漏洞类型为use-after-free,涉及UDP内核套接字在`cleanup_bearer()`函数中的释放问题。 + +3. **漏洞如何发生及其效果**: + - 漏洞发生的原因是在`tipc_udp_disable()`调用时,`cleanup_bearer()`函数延迟了UDP内核套接字的清理工作。然而,在释放内核套接字之前,`wq_count`计数被过早地减少,导致`tipc_exit_net()`误以为所有清理工作已完成,从而提前解除阻塞。这可能导致后续代码访问已释放的内存,引发use-after-free问题。 + - 效果:此漏洞可能允许攻击者通过触发特定条件导致系统崩溃或潜在的权限提升风险,影响系统的稳定性和安全性。 + +cve: ./data/2024/56xxx/CVE-2024-56644.json +In the Linux kernel, the following vulnerability has been resolved: + +net/ipv6: release expired exception dst cached in socket + +Dst objects get leaked in ip6_negative_advice() when this function is +executed for an expired IPv6 route located in the exception table. There +are several conditions that must be fulfilled for the leak to occur: +* an ICMPv6 packet indicating a change of the MTU for the path is received, + resulting in an exception dst being created +* a TCP connection that uses the exception dst for routing packets must + start timing out so that TCP begins retransmissions +* after the exception dst expires, the FIB6 garbage collector must not run + before TCP executes ip6_negative_advice() for the expired exception dst + +When TCP executes ip6_negative_advice() for an exception dst that has +expired and if no other socket holds a reference to the exception dst, the +refcount of the exception dst is 2, which corresponds to the increment +made by dst_init() and the increment made by the TCP socket for which the +connection is timing out. The refcount made by the socket is never +released. The refcount of the dst is decremented in sk_dst_reset() but +that decrement is counteracted by a dst_hold() intentionally placed just +before the sk_dst_reset() in ip6_negative_advice(). After +ip6_negative_advice() has finished, there is no other object tied to the +dst. The socket lost its reference stored in sk_dst_cache and the dst is +no longer in the exception table. The exception dst becomes a leaked +object. + +As a result of this dst leak, an unbalanced refcount is reported for the +loopback device of a net namespace being destroyed under kernels that do +not contain e5f80fcf869a ("ipv6: give an IPv6 dev to blackhole_netdev"): +unregister_netdevice: waiting for lo to become free. Usage count = 2 + +Fix the dst leak by removing the dst_hold() in ip6_negative_advice(). The +patch that introduced the dst_hold() in ip6_negative_advice() was +92f1655aa2b22 ("net: fix __dst_negative_advice() race"). But 92f1655aa2b22 +merely refactored the code with regards to the dst refcount so the issue +was present even before 92f1655aa2b22. The bug was introduced in +54c1a859efd9f ("ipv6: Don't drop cache route entry unless timer actually +expired.") where the expired cached route is deleted and the sk_dst_cache +member of the socket is set to NULL by calling dst_negative_advice() but +the refcount belonging to the socket is left unbalanced. + +The IPv4 version - ipv4_negative_advice() - is not affected by this bug. +When the TCP connection times out ipv4_negative_advice() merely resets the +sk_dst_cache of the socket while decrementing the refcount of the +exception dst. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关** + 该 CVE 涉及到 IPv6 路由缓存对象 (`dst`) 的引用计数泄漏问题,并在销毁网络命名空间 (`net namespace`) 时可能导致未平衡的引用计数。因此,它确实与 `namespace` 相关,尤其是网络命名空间。 + +2. **程序漏洞分析** + - **这是什么程序的漏洞**:这是 Linux 内核 (Kernel) 的漏洞。 + - **漏洞如何发生**: + - 当接收到一个 ICMPv6 数据包(指示路径 MTU 发生变化)时,会创建一个异常路由缓存对象 (`exception dst`)。 + - 如果相关的 TCP 连接开始超时并触发重传,且异常路由缓存对象过期后,在 FIB6 垃圾回收器运行之前调用了 `ip6_negative_advice()` 函数,则会发生引用计数泄漏。 + - 具体来说,`ip6_negative_advice()` 中的一个 `dst_hold()` 调用未能正确释放引用计数,导致异常路由缓存对象无法被正确释放。 + - **漏洞效果**: + - 泄漏的路由缓存对象会导致内存泄漏。 + - 在销毁网络命名空间时,可能会报告未平衡的引用计数问题(例如,循环设备 `lo` 的使用计数为 2,导致等待其释放)。这可能阻碍网络命名空间的正常销毁,从而影响依赖命名空间的功能(如容器网络)。 + +cve: ./data/2024/56xxx/CVE-2024-56658.json +In the Linux kernel, the following vulnerability has been resolved: + +net: defer final 'struct net' free in netns dismantle + +Ilya reported a slab-use-after-free in dst_destroy [1] + +Issue is in xfrm6_net_init() and xfrm4_net_init() : + +They copy xfrm[46]_dst_ops_template into net->xfrm.xfrm[46]_dst_ops. + +But net structure might be freed before all the dst callbacks are +called. So when dst_destroy() calls later : + +if (dst->ops->destroy) + dst->ops->destroy(dst); + +dst->ops points to the old net->xfrm.xfrm[46]_dst_ops, which has been freed. + +See a relevant issue fixed in : + +ac888d58869b ("net: do not delay dst_entries_add() in dst_release()") + +A fix is to queue the 'struct net' to be freed after one +another cleanup_net() round (and existing rcu_barrier()) + +[1] + +BUG: KASAN: slab-use-after-free in dst_destroy (net/core/dst.c:112) +Read of size 8 at addr ffff8882137ccab0 by task swapper/37/0 +Dec 03 05:46:18 kernel: +CPU: 37 UID: 0 PID: 0 Comm: swapper/37 Kdump: loaded Not tainted 6.12.0 #67 +Hardware name: Red Hat KVM/RHEL, BIOS 1.16.1-1.el9 04/01/2014 +Call Trace: + +dump_stack_lvl (lib/dump_stack.c:124) +print_address_description.constprop.0 (mm/kasan/report.c:378) +? dst_destroy (net/core/dst.c:112) +print_report (mm/kasan/report.c:489) +? dst_destroy (net/core/dst.c:112) +? kasan_addr_to_slab (mm/kasan/common.c:37) +kasan_report (mm/kasan/report.c:603) +? dst_destroy (net/core/dst.c:112) +? rcu_do_batch (kernel/rcu/tree.c:2567) +dst_destroy (net/core/dst.c:112) +rcu_do_batch (kernel/rcu/tree.c:2567) +? __pfx_rcu_do_batch (kernel/rcu/tree.c:2491) +? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4339 kernel/locking/lockdep.c:4406) +rcu_core (kernel/rcu/tree.c:2825) +handle_softirqs (kernel/softirq.c:554) +__irq_exit_rcu (kernel/softirq.c:589 kernel/softirq.c:428 kernel/softirq.c:637) +irq_exit_rcu (kernel/softirq.c:651) +sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1049 arch/x86/kernel/apic/apic.c:1049) + + +asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) +RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:743) +Code: 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 6e ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 0f 00 2d c7 c9 27 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 +RSP: 0018:ffff888100d2fe00 EFLAGS: 00000246 +RAX: 00000000001870ed RBX: 1ffff110201a5fc2 RCX: ffffffffb61a3e46 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb3d4d123 +RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed11c7e1835d +R10: ffff888e3f0c1aeb R11: 0000000000000000 R12: 0000000000000000 +R13: ffff888100d20000 R14: dffffc0000000000 R15: 0000000000000000 +? ct_kernel_exit.constprop.0 (kernel/context_tracking.c:148) +? cpuidle_idle_call (kernel/sched/idle.c:186) +default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) +cpuidle_idle_call (kernel/sched/idle.c:186) +? __pfx_cpuidle_idle_call (kernel/sched/idle.c:168) +? lock_release (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5848) +? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406) +? tsc_verify_tsc_adjust (arch/x86/kernel/tsc_sync.c:59) +do_idle (kernel/sched/idle.c:326) +cpu_startup_entry (kernel/sched/idle.c:423 (discriminator 1)) +start_secondary (arch/x86/kernel/smpboot.c:202 arch/x86/kernel/smpboot.c:282) +? __pfx_start_secondary (arch/x86/kernel/smpboot.c:232) +? soft_restart_cpu (arch/x86/kernel/head_64.S:452) +common_startup_64 (arch/x86/kernel/head_64.S:414) + +Dec 03 05:46:18 kernel: +Allocated by task 12184: +kasan_save_stack (mm/kasan/common.c:48) +kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) +__kasan_slab_alloc (mm/kasan/common.c:319 mm/kasan/common.c:345) +kmem_cache_alloc_noprof (mm/slub.c:4085 mm/slub.c:4134 mm/slub.c:4141) +copy_net_ns (net/core/net_namespace.c:421 net/core/net_namespace.c:480) +create_new_namespaces +---truncated--- + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与 namespace 相关。具体来说,它涉及网络命名空间(netns)的实现,其中 `struct net` 的释放时机不当,导致在 dst_destroy 调用时访问已释放的内存。 + +2. **程序漏洞分析**: + - **程序**:这是 Linux 内核(Kernel)中的漏洞。 + - **漏洞发生原因**:在初始化网络命名空间时,`xfrm6_net_init()` 和 `xfrm4_net_init()` 将 `xfrm[46]_dst_ops_template` 复制到 `net->xfrm.xfrm[46]_dst_ops`。然而,如果 `net` 结构在所有 dst 回调被调用之前就被释放了,那么当 `dst_destroy()` 被调用时,`dst->ops` 指向的内存已经被释放,从而导致 slab-use-after-free。 + - **效果**:此漏洞可能导致内核崩溃或信息泄露,攻击者可能利用此漏洞破坏系统的稳定性或进一步提升权限。 + +总结:该 CVE 与网络命名空间(netns)相关,是 Linux 内核中的一个 slab-use-after-free 漏洞,可能导致系统不稳定或被利用进行提权攻击。 + +cve: ./data/2024/56xxx/CVE-2024-56672.json +In the Linux kernel, the following vulnerability has been resolved: + +blk-cgroup: Fix UAF in blkcg_unpin_online() + +blkcg_unpin_online() walks up the blkcg hierarchy putting the online pin. To +walk up, it uses blkcg_parent(blkcg) but it was calling that after +blkcg_destroy_blkgs(blkcg) which could free the blkcg, leading to the +following UAF: + + ================================================================== + BUG: KASAN: slab-use-after-free in blkcg_unpin_online+0x15a/0x270 + Read of size 8 at addr ffff8881057678c0 by task kworker/9:1/117 + + CPU: 9 UID: 0 PID: 117 Comm: kworker/9:1 Not tainted 6.13.0-rc1-work-00182-gb8f52214c61a-dirty #48 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 02/02/2022 + Workqueue: cgwb_release cgwb_release_workfn + Call Trace: + + dump_stack_lvl+0x27/0x80 + print_report+0x151/0x710 + kasan_report+0xc0/0x100 + blkcg_unpin_online+0x15a/0x270 + cgwb_release_workfn+0x194/0x480 + process_scheduled_works+0x71b/0xe20 + worker_thread+0x82a/0xbd0 + kthread+0x242/0x2c0 + ret_from_fork+0x33/0x70 + ret_from_fork_asm+0x1a/0x30 + + ... + Freed by task 1944: + kasan_save_track+0x2b/0x70 + kasan_save_free_info+0x3c/0x50 + __kasan_slab_free+0x33/0x50 + kfree+0x10c/0x330 + css_free_rwork_fn+0xe6/0xb30 + process_scheduled_works+0x71b/0xe20 + worker_thread+0x82a/0xbd0 + kthread+0x242/0x2c0 + ret_from_fork+0x33/0x70 + ret_from_fork_asm+0x1a/0x30 + +Note that the UAF is not easy to trigger as the free path is indirected +behind a couple RCU grace periods and a work item execution. I could only +trigger it with artifical msleep() injected in blkcg_unpin_online(). + +Fix it by reading the parent pointer before destroying the blkcg's blkg's. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与 cgroup(控制组)直接相关。它涉及 `blk-cgroup` 的实现,这是 Linux 内核中用于管理块设备 I/O 资源分配和限制的一部分。 + +2. **这是什么程序的漏洞,如何发生,有何效果**: + - 这是 **Linux 内核** 的漏洞,具体发生在 `blk-cgroup` 子系统中。 + - 漏洞原因是 `blkcg_unpin_online()` 函数在调用 `blkcg_destroy_blkgs(blkcg)` 后使用了 `blkcg_parent(blkcg)`,而 `blkcg_destroy_blkgs()` 可能已经释放了 `blkcg` 对象,导致出现 Use-After-Free (UAF) 问题。 + - 效果:此漏洞可能导致内核崩溃(如 KASAN 报告所示),从而影响系统的稳定性。攻击者可能利用该漏洞触发拒绝服务 (DoS) 或进一步提升权限。 + +总结:这是一个与 cgroup 相关的 Linux 内核漏洞,可能导致 UAF 并引发系统崩溃或潜在的安全风险。 + +cve: ./data/2024/56xxx/CVE-2024-56783.json +In the Linux kernel, the following vulnerability has been resolved: + +netfilter: nft_socket: remove WARN_ON_ONCE on maximum cgroup level + +cgroup maximum depth is INT_MAX by default, there is a cgroup toggle to +restrict this maximum depth to a more reasonable value not to harm +performance. Remove unnecessary WARN_ON_ONCE which is reachable from +userspace. + +analysis: 1. 该CVE信息与cgroup相关。 +2. 这是Linux内核的漏洞。漏洞发生在netfilter子系统的nft_socket模块中,当处理cgroup的最大层级时,存在一个不必要的WARN_ON_ONCE调用,该调用可以从用户空间触发,可能导致性能问题或警告信息暴露。移除这个不必要的WARN_ON_ONCE可以避免潜在的性能影响和不必要的警告输出。 + +cve: ./data/2024/57xxx/CVE-2024-57974.json +In the Linux kernel, the following vulnerability has been resolved: + +udp: Deal with race between UDP socket address change and rehash + +If a UDP socket changes its local address while it's receiving +datagrams, as a result of connect(), there is a period during which +a lookup operation might fail to find it, after the address is changed +but before the secondary hash (port and address) and the four-tuple +hash (local and remote ports and addresses) are updated. + +Secondary hash chains were introduced by commit 30fff9231fad ("udp: +bind() optimisation") and, as a result, a rehash operation became +needed to make a bound socket reachable again after a connect(). + +This operation was introduced by commit 719f835853a9 ("udp: add +rehash on connect()") which isn't however a complete fix: the +socket will be found once the rehashing completes, but not while +it's pending. + +This is noticeable with a socat(1) server in UDP4-LISTEN mode, and a +client sending datagrams to it. After the server receives the first +datagram (cf. _xioopen_ipdgram_listen()), it issues a connect() to +the address of the sender, in order to set up a directed flow. + +Now, if the client, running on a different CPU thread, happens to +send a (subsequent) datagram while the server's socket changes its +address, but is not rehashed yet, this will result in a failed +lookup and a port unreachable error delivered to the client, as +apparent from the following reproducer: + + LEN=$(($(cat /proc/sys/net/core/wmem_default) / 4)) + dd if=/dev/urandom bs=1 count=${LEN} of=tmp.in + + while :; do + taskset -c 1 socat UDP4-LISTEN:1337,null-eof OPEN:tmp.out,create,trunc & + sleep 0.1 || sleep 1 + taskset -c 2 socat OPEN:tmp.in UDP4:localhost:1337,shut-null + wait + done + +where the client will eventually get ECONNREFUSED on a write() +(typically the second or third one of a given iteration): + + 2024/11/13 21:28:23 socat[46901] E write(6, 0x556db2e3c000, 8192): Connection refused + +This issue was first observed as a seldom failure in Podman's tests +checking UDP functionality while using pasta(1) to connect the +container's network namespace, which leads us to a reproducer with +the lookup error resulting in an ICMP packet on a tap device: + + LOCAL_ADDR="$(ip -j -4 addr show|jq -rM '.[] | .addr_info[0] | select(.scope == "global").local')" + + while :; do + ./pasta --config-net -p pasta.pcap -u 1337 socat UDP4-LISTEN:1337,null-eof OPEN:tmp.out,create,trunc & + sleep 0.2 || sleep 1 + socat OPEN:tmp.in UDP4:${LOCAL_ADDR}:1337,shut-null + wait + cmp tmp.in tmp.out + done + +Once this fails: + + tmp.in tmp.out differ: char 8193, line 29 + +we can finally have a look at what's going on: + + $ tshark -r pasta.pcap + 1 0.000000 :: ? ff02::16 ICMPv6 110 Multicast Listener Report Message v2 + 2 0.168690 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 + 3 0.168767 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 + 4 0.168806 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 + 5 0.168827 c6:47:05:8d:dc:04 ? Broadcast ARP 42 Who has 88.198.0.161? Tell 88.198.0.164 + 6 0.168851 9a:55:9a:55:9a:55 ? c6:47:05:8d:dc:04 ARP 42 88.198.0.161 is at 9a:55:9a:55:9a:55 + 7 0.168875 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 + 8 0.168896 88.198.0.164 ? 88.198.0.161 ICMP 590 Destination unreachable (Port unreachable) + 9 0.168926 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 + 10 0.168959 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 + 11 0.168989 88.198.0.161 ? 88.198.0.164 UDP 4138 60260 ? 1337 Len=4096 + 12 0.169010 88.198.0.161 ? 88.198.0.164 UDP 42 60260 ? 1337 Len=0 + +On the third datagram received, the network namespace of the container +initiates an ARP lookup to deliver the ICMP message. + +In another variant of this reproducer, starting the client with: + + strace -f pasta --config-net -u 1337 socat UDP4-LISTEN:1337,null-eof OPEN:tmp.out,create,tru +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 信息与网络命名空间(network namespace)和容器相关。问题在 Podman 测试中被观察到,并且涉及容器网络命名空间中的 UDP 行为。 + +2. **程序漏洞分析**: + - **程序**:这是 Linux 内核(Kernel)中的漏洞。 + - **漏洞发生原因**:当一个 UDP 套接字通过 `connect()` 改变其本地地址时,在更新二级哈希(port 和 address)以及四元组哈希(local 和 remote ports 和 addresses)之前,存在一个时间窗口,在此期间查找操作可能会失败。这导致客户端收到 "Port unreachable" 错误。 + - **效果**:该漏洞会导致在特定条件下(如多线程环境下的 UDP 数据包发送),客户端可能无法正确接收服务器响应,从而引发连接被拒绝(ECONNREFUSED)或端口不可达(ICMP Port Unreachable)的问题。这对依赖 UDP 的应用(例如容器内的网络通信)可能造成间歇性故障。 + +总结:这是一个 Linux 内核中的 UDP 套接字实现漏洞,与网络命名空间和容器网络行为相关。 + +cve: ./data/2024/57xxx/CVE-2024-57977.json +In the Linux kernel, the following vulnerability has been resolved: + +memcg: fix soft lockup in the OOM process + +A soft lockup issue was found in the product with about 56,000 tasks were +in the OOM cgroup, it was traversing them when the soft lockup was +triggered. + +watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [VM Thread:1503066] +CPU: 2 PID: 1503066 Comm: VM Thread Kdump: loaded Tainted: G +Hardware name: Huawei Cloud OpenStack Nova, BIOS +RIP: 0010:console_unlock+0x343/0x540 +RSP: 0000:ffffb751447db9a0 EFLAGS: 00000247 ORIG_RAX: ffffffffffffff13 +RAX: 0000000000000001 RBX: 0000000000000000 RCX: 00000000ffffffff +RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000247 +RBP: ffffffffafc71f90 R08: 0000000000000000 R09: 0000000000000040 +R10: 0000000000000080 R11: 0000000000000000 R12: ffffffffafc74bd0 +R13: ffffffffaf60a220 R14: 0000000000000247 R15: 0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f2fe6ad91f0 CR3: 00000004b2076003 CR4: 0000000000360ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + vprintk_emit+0x193/0x280 + printk+0x52/0x6e + dump_task+0x114/0x130 + mem_cgroup_scan_tasks+0x76/0x100 + dump_header+0x1fe/0x210 + oom_kill_process+0xd1/0x100 + out_of_memory+0x125/0x570 + mem_cgroup_out_of_memory+0xb5/0xd0 + try_charge+0x720/0x770 + mem_cgroup_try_charge+0x86/0x180 + mem_cgroup_try_charge_delay+0x1c/0x40 + do_anonymous_page+0xb5/0x390 + handle_mm_fault+0xc4/0x1f0 + +This is because thousands of processes are in the OOM cgroup, it takes a +long time to traverse all of them. As a result, this lead to soft lockup +in the OOM process. + +To fix this issue, call 'cond_resched' in the 'mem_cgroup_scan_tasks' +function per 1000 iterations. For global OOM, call +'touch_softlockup_watchdog' per 1000 iterations to avoid this issue. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,该CVE与cgroup相关。具体来说,问题出现在处理OOM(Out-of-Memory)时对cgroup中任务的遍历。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核(Kernel)中的漏洞。 + - **漏洞发生原因**:当大量任务(约56,000个)位于同一个OOM cgroup中时,内核在遍历这些任务以决定OOM killer的目标时,由于任务数量过多,导致遍历过程耗时过长。这使得CPU长时间被占用而无法响应其他任务,最终触发了软锁定(soft lockup)。 + - **效果**:软锁定会导致特定CPU核心在一段时间内无法响应其他中断或任务调度,可能影响系统的稳定性和响应能力。在极端情况下,可能导致系统部分功能失效或需要重启。 + +总结:该CVE是一个与cgroup相关的Linux内核漏洞,主要影响OOM处理逻辑,可能导致软锁定问题。 + +cve: ./data/2024/58xxx/CVE-2024-58088.json +In the Linux kernel, the following vulnerability has been resolved: + +bpf: Fix deadlock when freeing cgroup storage + +The following commit +bc235cdb423a ("bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]") +first introduced deadlock prevention for fentry/fexit programs attaching +on bpf_task_storage helpers. That commit also employed the logic in map +free path in its v6 version. + +Later bpf_cgrp_storage was first introduced in +c4bcfb38a95e ("bpf: Implement cgroup storage available to non-cgroup-attached bpf progs") +which faces the same issue as bpf_task_storage, instead of its busy +counter, NULL was passed to bpf_local_storage_map_free() which opened +a window to cause deadlock: + + + (acquiring local_storage->lock) + _raw_spin_lock_irqsave+0x3d/0x50 + bpf_local_storage_update+0xd1/0x460 + bpf_cgrp_storage_get+0x109/0x130 + bpf_prog_a4d4a370ba857314_cgrp_ptr+0x139/0x170 + ? __bpf_prog_enter_recur+0x16/0x80 + bpf_trampoline_6442485186+0x43/0xa4 + cgroup_storage_ptr+0x9/0x20 + (holding local_storage->lock) + bpf_selem_unlink_storage_nolock.constprop.0+0x135/0x160 + bpf_selem_unlink_storage+0x6f/0x110 + bpf_local_storage_map_free+0xa2/0x110 + bpf_map_free_deferred+0x5b/0x90 + process_one_work+0x17c/0x390 + worker_thread+0x251/0x360 + kthread+0xd2/0x100 + ret_from_fork+0x34/0x50 + ret_from_fork_asm+0x1a/0x30 + + +Progs: + - A: SEC("fentry/cgroup_storage_ptr") + - cgid (BPF_MAP_TYPE_HASH) + Record the id of the cgroup the current task belonging + to in this hash map, using the address of the cgroup + as the map key. + - cgrpa (BPF_MAP_TYPE_CGRP_STORAGE) + If current task is a kworker, lookup the above hash + map using function parameter @owner as the key to get + its corresponding cgroup id which is then used to get + a trusted pointer to the cgroup through + bpf_cgroup_from_id(). This trusted pointer can then + be passed to bpf_cgrp_storage_get() to finally trigger + the deadlock issue. + - B: SEC("tp_btf/sys_enter") + - cgrpb (BPF_MAP_TYPE_CGRP_STORAGE) + The only purpose of this prog is to fill Prog A's + hash map by calling bpf_cgrp_storage_get() for as + many userspace tasks as possible. + +Steps to reproduce: + - Run A; + - while (true) { Run B; Destroy B; } + +Fix this issue by passing its busy counter to the free procedure so +it can be properly incremented before storage/smap locking. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该CVE与cgroup和BPF程序相关。具体涉及`bpf_cgrp_storage`和`BPF_MAP_TYPE_CGRP_STORAGE`,这些组件与cgroup紧密关联,用于在BPF程序中实现与cgroup相关的存储功能。 + +2. **这是什么程序的漏洞:** + - 这是**Linux内核**中的漏洞,具体发生在BPF(Berkeley Packet Filter)子系统中。 + - **漏洞发生原因:** + 在处理`bpf_cgrp_storage`时,由于未正确传递`busy counter`到释放路径,导致在特定条件下可能发生死锁。问题的核心在于`bpf_local_storage_map_free()`函数中传递了`NULL`作为`busy counter`,从而打开了一扇可能导致死锁的窗口。 + - **效果:** + 该漏洞可能导致系统在特定情况下进入死锁状态,影响系统的稳定性和可用性。尤其是当BPF程序尝试访问或释放cgroup存储时,可能会导致关键线程无法继续执行。 + +3. **总结:** + 该CVE明确与cgroup和BPF程序相关,属于Linux内核的安全问题,可能对依赖BPF和cgroup的容器运行时(如Docker、Kubernetes等)产生间接影响,尤其是在使用BPF进行资源监控或隔离时。 + +cve: ./data/2024/5xxx/CVE-2024-5652.json +In Docker Desktop on Windows before v4.31.0 allows a user in the docker-users group to cause a Windows Denial-of-Service through the exec-path Docker daemon config option in Windows containers mode. + +analysis: 1. 该 CVE 信息与 container 和 Docker 相关。 + +2. 这是 Docker Desktop 的漏洞。 + - 漏洞发生的原因:在 Windows 上,Docker Desktop 允许通过 `exec-path` 配置选项来指定 Docker 守护进程的行为。如果用户属于 `docker-users` 组,并且 Docker 处于 Windows 容器模式,攻击者可以滥用此配置选项。 + - 漏洞效果:攻击者可以通过恶意配置 `exec-path` 导致 Docker 守护进程崩溃,从而造成 Windows 系统上的拒绝服务(Denial-of-Service, DoS)。 + +cve: ./data/2024/6xxx/CVE-2024-6222.json +In Docker Desktop before v4.29.0, an attacker who has gained access to the Docker Desktop VM through a container breakout can further escape to the host by passing extensions and dashboard related IPC messages. + +Docker Desktop v4.29.0 https://docs.docker.com/desktop/release-notes/#4290 fixes the issue on MacOS, Linux and Windows with Hyper-V backend. + +As exploitation requires "Allow only extensions distributed through the Docker Marketplace" to be disabled, Docker Desktop  v4.31.0 https://docs.docker.com/desktop/release-notes/#4310  additionally changes the default configuration to enable this setting by default. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器和隔离相关。它涉及 Docker Desktop 的容器 breakout 问题,并进一步影响到主机的隔离性。 + +2. **程序漏洞分析**: + - **程序**:这是 Docker Desktop 的漏洞,而不是内核 Kernel 或容器内部运行的应用。 + - **漏洞发生方式**:攻击者需要首先通过某种方式实现容器 breakout(逃离容器),然后利用 Docker Desktop 的 IPC(进程间通信)机制,通过扩展(extensions)和仪表板相关的消息传递功能,进一步从 Docker Desktop 的虚拟机环境中逃逸到主机。 + - **效果**:此漏洞允许攻击者在已经突破容器隔离的基础上,进一步突破 Docker Desktop 虚拟机的隔离,直接访问和控制宿主机系统,从而可能导致宿主机被完全 compromise(攻陷)。 + +总结:Docker Desktop 的漏洞,与容器和隔离相关,攻击者可通过容器 breakout 和 IPC 消息传递机制从虚拟机逃逸到宿主机。 + +cve: ./data/2024/7xxx/CVE-2024-7387.json +A flaw was found in openshift/builder. This vulnerability allows command injection via path traversal, where a malicious user can execute arbitrary commands on the OpenShift node running the builder container. When using the “Docker” strategy, executable files inside the privileged build container can be overridden using the `spec.source.secrets.secret.destinationDir` attribute of the `BuildConfig` definition. An attacker running code in a privileged container could escalate their permissions on the node running the container. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是OpenShift Builder组件的漏洞,具体涉及容器实现。该漏洞发生在使用“Docker”构建策略时,攻击者可以通过`spec.source.secrets.secret.destinationDir`属性覆盖特权构建容器中的可执行文件,从而实现命令注入和路径遍历攻击。效果是恶意用户可以在运行构建器容器的OpenShift节点上执行任意命令,进而利用特权容器逃逸到宿主节点,导致权限提升和对宿主系统的完全控制。 + +cve: ./data/2024/7xxx/CVE-2024-7557.json +A vulnerability was found in OpenShift AI that allows for authentication bypass and privilege escalation across models within the same namespace. When deploying AI models, the UI provides the option to protect models with authentication. However, credentials from one model can be used to access other models and APIs within the same namespace. The exposed ServiceAccount tokens, visible in the UI, can be utilized with oc --token={token} to exploit the elevated view privileges associated with the ServiceAccount, leading to unauthorized access to additional resources. + +analysis: 1. 该CVE信息与namespace相关。 +2. 这是OpenShift AI的漏洞。漏洞发生的原因是UI提供的模型保护机制存在缺陷,导致在一个namespace内的模型和API的认证可以被其他模型的凭据绕过。效果是攻击者可以利用暴露的ServiceAccount令牌,通过`oc --token={token}`命令获取更高权限,进而访问和操控同一namespace内的其他资源,造成跨模型的认证绕过和权限提升。 + +cve: ./data/2024/7xxx/CVE-2024-7598.json +A security issue was discovered in Kubernetes where a malicious or compromised pod could bypass network restrictions enforced by network policies during namespace deletion. The order in which objects are deleted during namespace termination is not defined, and it is possible for network policies to be deleted before the pods that they protect. This can lead to a brief period in which the pods are running, but network policies that should apply to connections to and from the pods are not enforced. + +analysis: 1. 这个CVE信息与namespace相关。 + +2. 这是Kubernetes的漏洞。漏洞发生在Kubernetes在删除namespace时,由于对象删除顺序未定义,可能导致网络策略(Network Policies)在网络策略保护的Pods之前被删除。这会导致在Pod仍然运行期间,本应适用的网络策略未被强制执行,从而允许恶意或受感染的Pod绕过网络限制。其效果是破坏了网络隔离,使得Pod能够在没有适当网络策略限制的情况下进行通信。 + +cve: ./data/2024/7xxx/CVE-2024-7771.json +A vulnerability in the Dockerized version of mintplex-labs/anything-llm (latest, digest 1d9452da2b92) allows for a denial of service. Uploading an audio file with a very low sample rate causes the functionality responsible for transcribing it to crash the entire site instance. The issue arises from the localWhisper implementation, where resampling the audio file from 1 Hz to 16000 Hz quickly exceeds available memory, leading to the Docker instance being killed by the instance manager. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与容器相关。问题发生在Docker化的应用程序中,并且由于内存耗尽导致Docker实例被杀死,这涉及到容器资源限制和隔离机制。 + +2. **程序漏洞分析**: + - **程序类型**:这是容器内部运行的应用程序的漏洞,具体是 `mintplex-labs/anything-llm` 中的 `localWhisper` 功能模块。 + - **漏洞发生原因**:当用户上传一个采样率非常低(例如1 Hz)的音频文件时,`localWhisper` 模块尝试将其重新采样到16000 Hz。这一过程需要大量内存,最终导致内存耗尽。 + - **漏洞效果**:由于内存不足,Docker容器被容器管理器(如Docker守护进程或cgroup机制)杀死,从而引发整个站点实例的拒绝服务(DoS)。 + +总结:这是一个容器内部运行的应用程序漏洞,与容器的资源限制和隔离机制相关。 + +cve: ./data/2024/8xxx/CVE-2024-8037.json +Vulnerable juju hook tool abstract UNIX domain socket. When combined with an attack of JUJU_CONTEXT_ID, any user on the local system with access to the default network namespace may connect to the @/var/lib/juju/agents/unit-xxxx-yyyy/agent.socket and perform actions that are normally reserved to a juju charm. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该CVE与namespace相关。描述中提到“default network namespace”,表明问题涉及网络命名空间(network namespace),这是Linux命名空间的一种,用于隔离网络资源。 + +2. **程序漏洞分析:** + - **程序:** 这是Juju hook工具的漏洞。 + - **漏洞发生原因:** 由于Juju hook工具抽象了UNIX域套接字,并且在结合JUJU_CONTEXT_ID攻击时,任何对默认网络命名空间有访问权限的本地用户,都可以连接到`@/var/lib/juju/agents/unit-xxxx-yyyy/agent.socket`。 + - **效果:** 攻击者可以执行通常仅限于Juju charm的操作。这可能导致权限提升或未授权的操作执行,破坏Juju环境的安全性。 + +总结:该CVE与默认网络命名空间相关,允许本地用户绕过预期的隔离机制,从而对Juju charm进行未授权操作。 + +cve: ./data/2024/8xxx/CVE-2024-8038.json +Vulnerable juju introspection abstract UNIX domain socket. An abstract UNIX domain socket responsible for introspection is available without authentication locally to network namespace users. This enables denial of service attacks. + +analysis: 1. 该 CVE 信息与 namespace、cgroup、container 或者容器、隔离相关。 + +2. 这是 Juju 程序的漏洞。Juju 是一个用于应用程序建模和部署的工具,通常在容器或虚拟化环境中使用。该漏洞发生在 Juju 的 introspection 功能中,具体是因为一个抽象的 UNIX 域套接字(abstract UNIX domain socket)未进行身份验证,并且对网络命名空间(network namespace)中的用户开放。这使得攻击者可以在本地利用此漏洞发起拒绝服务(Denial of Service, DoS)攻击,可能导致 Juju 的相关功能不可用或中断。 + +cve: ./data/2024/8xxx/CVE-2024-8060.json +OpenWebUI version 0.3.0 contains a vulnerability in the audio API endpoint `/audio/api/v1/transcriptions` that allows for arbitrary file upload. The application performs insufficient validation on the `file.content_type` and allows user-controlled filenames, leading to a path traversal vulnerability. This can be exploited by an authenticated user to overwrite critical files within the Docker container, potentially leading to remote code execution as the root user. + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是容器内部运行的应用程序(OpenWebUI)的漏洞。漏洞发生的原因是应用对`file.content_type`的验证不足,并允许用户控制的文件名,从而导致路径遍历漏洞。其效果是,经过身份验证的攻击者可以利用该漏洞覆盖Docker容器中的关键文件,进而可能实现以root用户身份进行远程代码执行。 + +cve: ./data/2024/8xxx/CVE-2024-8695.json +A remote code execution (RCE) vulnerability via crafted extension description/changelog could be abused by a malicious extension in Docker Desktop before 4.34.2. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop的漏洞。该漏洞是由于Docker Desktop在处理扩展描述或变更日志时,未能正确验证或限制恶意扩展的行为,导致恶意扩展可以通过构造特定的扩展描述或变更日志实现远程代码执行(RCE)。此漏洞的效果是攻击者可以利用恶意扩展在目标系统上执行任意代码,可能突破容器的隔离机制,进而影响宿主机的安全。 + +cve: ./data/2024/8xxx/CVE-2024-8696.json +A remote code execution (RCE) vulnerability via crafted extension publisher-url/additional-urls could be abused by a malicious extension in Docker Desktop before 4.34.2. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 + +2. 这是Docker Desktop程序的漏洞。该漏洞发生在Docker Desktop的扩展机制中,恶意扩展可以通过精心构造的`publisher-url`或`additional-urls`字段触发远程代码执行(RCE)。其效果是允许攻击者在受影响的Docker Desktop主机上以运行Docker Desktop进程的权限执行任意代码,从而破坏容器隔离性并可能进一步危害整个系统。 + +cve: ./data/2024/9xxx/CVE-2024-9407.json +A vulnerability exists in the bind-propagation option of the Dockerfile RUN --mount instruction. The system does not properly validate the input passed to this option, allowing users to pass arbitrary parameters to the mount instruction. This issue can be exploited to mount sensitive directories from the host into a container during the build process and, in some cases, modify the contents of those mounted files. Even if SELinux is used, this vulnerability can bypass its protection by allowing the source directory to be relabeled to give the container access to host files. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Docker的漏洞。漏洞发生在Dockerfile的`RUN --mount`指令的bind-propagation选项中,由于系统未正确验证传递给该选项的输入,导致用户可以传递任意参数到mount指令。此漏洞的效果是攻击者可以在构建过程中将主机上的敏感目录挂载到容器中,并在某些情况下修改这些挂载文件的内容。即使启用了SELinux,该漏洞也可以通过重新标记源目录来绕过其保护,从而使容器能够访问主机文件。 + +cve: ./data/2024/9xxx/CVE-2024-9676.json +A vulnerability was found in Podman, Buildah, and CRI-O. A symlink traversal vulnerability in the containers/storage library can cause Podman, Buildah, and CRI-O to hang and result in a denial of service via OOM kill when running a malicious image using an automatically assigned user namespace (`--userns=auto` in Podman and Buildah). The containers/storage library will read /etc/passwd inside the container, but does not properly validate if that file is a symlink, which can be used to cause the library to read an arbitrary file on the host. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,该CVE与容器和用户命名空间(user namespace)相关。 + +2. **程序漏洞分析**: + - **程序**:这是容器实现程序 Podman、Buildah 和 CRI-O 的漏洞,具体问题出在其依赖的 `containers/storage` 库中。 + - **漏洞发生原因**:当运行恶意容器镜像时,如果使用了自动分配的用户命名空间(`--userns=auto`),`containers/storage` 库会读取容器内的 `/etc/passwd` 文件。然而,该库未能正确验证该文件是否为符号链接(symlink),这使得攻击者可以构造一个指向主机上任意文件的符号链接,导致库尝试读取主机上的任意文件。 + - **效果**:这种行为可能导致以下后果: + - **拒绝服务(DoS)**:由于资源耗尽(例如内存耗尽,触发 OOM kill),Podman、Buildah 或 CRI-O 可能会挂起或崩溃。 + - **潜在信息泄露**:攻击者可能通过符号链接访问主机上的敏感文件,尽管这一具体 CVE 描述中未明确提到数据泄露,但这种风险是存在的。 + +总结:这是一个与容器和用户命名空间相关的漏洞,发生在容器实现程序(Podman、Buildah 和 CRI-O)中,其依赖的 `containers/storage` 库存在符号链接遍历问题,可能导致拒绝服务或潜在的信息泄露。 + +cve: ./data/2025/0xxx/CVE-2025-0113.json +A problem with the network isolation mechanism of the Palo Alto Networks Cortex XDR Broker VM allows attackers unauthorized access to Docker containers from the host network used by Broker VM. This may allow access to read files sent for analysis and logs transmitted by the Cortex XDR Agent to the Cortex XDR server. + +analysis: 1. 该CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Palo Alto Networks Cortex XDR Broker VM的漏洞。这是一个与容器实现(Docker)相关的漏洞,问题出在网络隔离机制上。由于网络隔离机制存在缺陷,攻击者可以从Broker VM使用的主机网络未经授权访问Docker容器。这可能导致攻击者访问到由Cortex XDR Agent发送至Cortex XDR服务器的分析文件和日志。 + +总结:该漏洞发生在Palo Alto Networks Cortex XDR Broker VM的网络隔离机制中,导致Docker容器被从主机网络未经授权访问,可能泄露敏感文件和日志。 + +cve: ./data/2025/0xxx/CVE-2025-0937.json +Nomad Community and Nomad Enterprise ("Nomad") event stream configured with a wildcard namespace can bypass the ACL Policy allowing reads on other namespaces. + +analysis: 1. 这个CVE信息与namespace相关,涉及ACL Policy绕过问题,可能导致跨命名空间的未授权访问。 + +2. 这是HashiCorp Nomad程序的漏洞。 + - 漏洞发生的原因:当Nomad事件流配置为使用通配符命名空间时,ACL策略未能正确限制访问权限,导致攻击者可以通过配置通配符命名空间绕过ACL策略。 + - 效果:攻击者可以未经授权读取其他命名空间的数据,破坏了多租户环境下的隔离性。 + +N/A + +cve: ./data/2025/21xxx/CVE-2025-21634.json +In the Linux kernel, the following vulnerability has been resolved: + +cgroup/cpuset: remove kernfs active break + +A warning was found: + +WARNING: CPU: 10 PID: 3486953 at fs/kernfs/file.c:828 +CPU: 10 PID: 3486953 Comm: rmdir Kdump: loaded Tainted: G +RIP: 0010:kernfs_should_drain_open_files+0x1a1/0x1b0 +RSP: 0018:ffff8881107ef9e0 EFLAGS: 00010202 +RAX: 0000000080000002 RBX: ffff888154738c00 RCX: dffffc0000000000 +RDX: 0000000000000007 RSI: 0000000000000004 RDI: ffff888154738c04 +RBP: ffff888154738c04 R08: ffffffffaf27fa15 R09: ffffed102a8e7180 +R10: ffff888154738c07 R11: 0000000000000000 R12: ffff888154738c08 +R13: ffff888750f8c000 R14: ffff888750f8c0e8 R15: ffff888154738ca0 +FS: 00007f84cd0be740(0000) GS:ffff8887ddc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000555f9fbe00c8 CR3: 0000000153eec001 CR4: 0000000000370ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + kernfs_drain+0x15e/0x2f0 + __kernfs_remove+0x165/0x300 + kernfs_remove_by_name_ns+0x7b/0xc0 + cgroup_rm_file+0x154/0x1c0 + cgroup_addrm_files+0x1c2/0x1f0 + css_clear_dir+0x77/0x110 + kill_css+0x4c/0x1b0 + cgroup_destroy_locked+0x194/0x380 + cgroup_rmdir+0x2a/0x140 + +It can be explained by: +rmdir echo 1 > cpuset.cpus + kernfs_fop_write_iter // active=0 +cgroup_rm_file +kernfs_remove_by_name_ns kernfs_get_active // active=1 +__kernfs_remove // active=0x80000002 +kernfs_drain cpuset_write_resmask +wait_event +//waiting (active == 0x80000001) + kernfs_break_active_protection + // active = 0x80000001 +// continue + kernfs_unbreak_active_protection + // active = 0x80000002 +... +kernfs_should_drain_open_files +// warning occurs + kernfs_put_active + +This warning is caused by 'kernfs_break_active_protection' when it is +writing to cpuset.cpus, and the cgroup is removed concurrently. + +The commit 3a5a6d0c2b03 ("cpuset: don't nest cgroup_mutex inside +get_online_cpus()") made cpuset_hotplug_workfn asynchronous, This change +involves calling flush_work(), which can create a multiple processes +circular locking dependency that involve cgroup_mutex, potentially leading +to a deadlock. To avoid deadlock. the commit 76bb5ab8f6e3 ("cpuset: break +kernfs active protection in cpuset_write_resmask()") added +'kernfs_break_active_protection' in the cpuset_write_resmask. This could +lead to this warning. + +After the commit 2125c0034c5d ("cgroup/cpuset: Make cpuset hotplug +processing synchronous"), the cpuset_write_resmask no longer needs to +wait the hotplug to finish, which means that concurrent hotplug and cpuset +operations are no longer possible. Therefore, the deadlock doesn't exist +anymore and it does not have to 'break active protection' now. To fix this +warning, just remove kernfs_break_active_protection operation in the +'cpuset_write_resmask'. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与`cgroup`和`cpuset`子系统相关,而`cgroup`是Linux内核中用于实现资源限制、优先级、会计等功能的重要机制,常用于容器技术(如Docker、Kubernetes)中的资源隔离。 + +2. **这是什么程序的漏洞**: + 这是**Linux内核**中的漏洞。具体来说,问题出现在`cgroup`子系统的`cpuset`功能中。漏洞的发生是因为在并发场景下,`kernfs_break_active_protection`被调用时,可能导致警告(WARNING)或潜在的死锁问题。 + + **漏洞如何发生**: + 当对`cpuset.cpus`文件进行写操作时,如果同时移除了对应的`cgroup`,会导致`kernfs_break_active_protection`和`kernfs_unbreak_active_protection`之间的竞争条件,从而触发警告。这种竞争条件的根本原因是`cpuset_hotplug_workfn`被改为异步执行后,引入了潜在的循环锁定依赖。 + + **效果**: + 该漏洞可能导致内核发出警告(WARNING),并可能在极端情况下引发死锁,影响系统的稳定性和可靠性。虽然这本身不会直接导致系统崩溃,但如果被恶意利用,可能会干扰基于`cgroup`的资源隔离机制,进而影响容器化环境的安全性。 + +3. **总结**: + - 相关性:与`cgroup`和`cpuset`相关。 + - 程序:Linux内核。 + - 漏洞原因:`kernfs_break_active_protection`在并发场景下的使用不当。 + - 影响:可能导致内核警告或死锁,影响资源隔离机制的稳定性。 + +cve: ./data/2025/21xxx/CVE-2025-21642.json +In the Linux kernel, the following vulnerability has been resolved: + +mptcp: sysctl: sched: avoid using current->nsproxy + +Using the 'net' structure via 'current' is not recommended for different +reasons. + +First, if the goal is to use it to read or write per-netns data, this is +inconsistent with how the "generic" sysctl entries are doing: directly +by only using pointers set to the table entry, e.g. table->data. Linked +to that, the per-netns data should always be obtained from the table +linked to the netns it had been created for, which may not coincide with +the reader's or writer's netns. + +Another reason is that access to current->nsproxy->netns can oops if +attempted when current->nsproxy had been dropped when the current task +is exiting. This is what syzbot found, when using acct(2): + + Oops: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN PTI + KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] + CPU: 1 UID: 0 PID: 5924 Comm: syz-executor Not tainted 6.13.0-rc5-syzkaller-00004-gccb98ccef0e5 #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 + RIP: 0010:proc_scheduler+0xc6/0x3c0 net/mptcp/ctrl.c:125 + Code: 03 42 80 3c 38 00 0f 85 fe 02 00 00 4d 8b a4 24 08 09 00 00 48 b8 00 00 00 00 00 fc ff df 49 8d 7c 24 28 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 cc 02 00 00 4d 8b 7c 24 28 48 8d 84 24 c8 00 00 + RSP: 0018:ffffc900034774e8 EFLAGS: 00010206 + + RAX: dffffc0000000000 RBX: 1ffff9200068ee9e RCX: ffffc90003477620 + RDX: 0000000000000005 RSI: ffffffff8b08f91e RDI: 0000000000000028 + RBP: 0000000000000001 R08: ffffc90003477710 R09: 0000000000000040 + R10: 0000000000000040 R11: 00000000726f7475 R12: 0000000000000000 + R13: ffffc90003477620 R14: ffffc90003477710 R15: dffffc0000000000 + FS: 0000000000000000(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fee3cd452d8 CR3: 000000007d116000 CR4: 00000000003526f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + proc_sys_call_handler+0x403/0x5d0 fs/proc/proc_sysctl.c:601 + __kernel_write_iter+0x318/0xa80 fs/read_write.c:612 + __kernel_write+0xf6/0x140 fs/read_write.c:632 + do_acct_process+0xcb0/0x14a0 kernel/acct.c:539 + acct_pin_kill+0x2d/0x100 kernel/acct.c:192 + pin_kill+0x194/0x7c0 fs/fs_pin.c:44 + mnt_pin_kill+0x61/0x1e0 fs/fs_pin.c:81 + cleanup_mnt+0x3ac/0x450 fs/namespace.c:1366 + task_work_run+0x14e/0x250 kernel/task_work.c:239 + exit_task_work include/linux/task_work.h:43 [inline] + do_exit+0xad8/0x2d70 kernel/exit.c:938 + do_group_exit+0xd3/0x2a0 kernel/exit.c:1087 + get_signal+0x2576/0x2610 kernel/signal.c:3017 + arch_do_signal_or_restart+0x90/0x7e0 arch/x86/kernel/signal.c:337 + exit_to_user_mode_loop kernel/entry/common.c:111 [inline] + exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] + __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] + syscall_exit_to_user_mode+0x150/0x2a0 kernel/entry/common.c:218 + do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + RIP: 0033:0x7fee3cb87a6a + Code: Unable to access opcode bytes at 0x7fee3cb87a40. + RSP: 002b:00007fffcccac688 EFLAGS: 00000202 ORIG_RAX: 0000000000000037 + RAX: 0000000000000000 RBX: 00007fffcccac710 RCX: 00007fee3cb87a6a + RDX: 0000000000000041 RSI: 0000000000000000 RDI: 0000000000000003 + RBP: 0000000000000003 R08: 00007fffcccac6ac R09: 00007fffcccacac7 + R10: 00007fffcccac710 R11: 0000000000000202 R12: 00007fee3cd49500 + R13: 00007fffcccac6ac R14: 0000000000000000 R15: 00007fee3cd4b000 + + Modules linked in: + ---[ end trace 0000000000000000 ]--- + RIP: 0010:proc_scheduler+0xc6/0x3c0 net/mptcp/ctrl.c:125 + Code: 03 42 80 3c 38 00 0f 85 fe 02 00 00 4d 8b a4 24 08 09 00 00 48 b8 00 00 00 00 00 fc +---truncated--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是的,该漏洞与namespace相关。具体来说,问题涉及`current->nsproxy->netns`的使用,这与网络命名空间(netns)有关。 + +2. **这是什么程序的漏洞:** + - 这是Linux内核(Kernel)中的漏洞。 + - 漏洞发生在多路径TCP(MPTCP)子系统中,具体是在处理sysctl条目时对网络命名空间(netns)的不正确访问导致的问题。 + - **漏洞发生原因:** 在某些情况下(例如任务退出时),直接通过`current->nsproxy->netns`访问网络命名空间可能导致引用已释放的内存,从而引发空指针解引用错误(null-ptr-deref)。 + - **效果:** 此漏洞可能导致内核崩溃(Oops),从而使系统不可用。如果攻击者能够触发此漏洞,可能会导致拒绝服务(DoS)攻击。 + +总结:这是一个与网络命名空间相关的Linux内核漏洞,影响MPTCP子系统的sysctl功能,可能导致内核崩溃。 + +cve: ./data/2025/21xxx/CVE-2025-21659.json +In the Linux kernel, the following vulnerability has been resolved: + +netdev: prevent accessing NAPI instances from another namespace + +The NAPI IDs were not fully exposed to user space prior to the netlink +API, so they were never namespaced. The netlink API must ensure that +at the very least NAPI instance belongs to the same netns as the owner +of the genl sock. + +napi_by_id() can become static now, but it needs to move because of +dev_get_by_napi_id(). + +analysis: 1. 该CVE信息与namespace相关。 +2. 这是Linux内核的漏洞。漏洞发生的原因是NAPI(New API)实例在netlink API中没有正确地限制在所属的网络命名空间(netns)内,导致可能从另一个命名空间访问NAPI实例。这种漏洞的效果可能导致攻击者绕过命名空间隔离,访问或操作不属于当前命名空间的网络资源,从而破坏容器或虚拟化环境的隔离性。 + +cve: ./data/2025/21xxx/CVE-2025-21677.json +In the Linux kernel, the following vulnerability has been resolved: + +pfcp: Destroy device along with udp socket's netns dismantle. + +pfcp_newlink() links the device to a list in dev_net(dev) instead +of net, where a udp tunnel socket is created. + +Even when net is removed, the device stays alive on dev_net(dev). +Then, removing net triggers the splat below. [0] + +In this example, pfcp0 is created in ns2, but the udp socket is +created in ns1. + + ip netns add ns1 + ip netns add ns2 + ip -n ns1 link add netns ns2 name pfcp0 type pfcp + ip netns del ns1 + +Let's link the device to the socket's netns instead. + +Now, pfcp_net_exit() needs another netdev iteration to remove +all pfcp devices in the netns. + +pfcp_dev_list is not used under RCU, so the list API is converted +to the non-RCU variant. + +pfcp_net_exit() can be converted to .exit_batch_rtnl() in net-next. + +[0]: +ref_tracker: net notrefcnt@00000000128b34dc has 1/1 users at + sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236) + inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252) + __sock_create (net/socket.c:1558) + udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18) + pfcp_create_sock (drivers/net/pfcp.c:168) + pfcp_newlink (drivers/net/pfcp.c:182 drivers/net/pfcp.c:197) + rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012) + rtnetlink_rcv_msg (net/core/rtnetlink.c:6922) + netlink_rcv_skb (net/netlink/af_netlink.c:2542) + netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347) + netlink_sendmsg (net/netlink/af_netlink.c:1891) + ____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583) + ___sys_sendmsg (net/socket.c:2639) + __sys_sendmsg (net/socket.c:2669) + do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + +WARNING: CPU: 1 PID: 11 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179) +Modules linked in: +CPU: 1 UID: 0 PID: 11 Comm: kworker/u16:0 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Workqueue: netns cleanup_net +RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179) +Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89 +RSP: 0018:ff11000007f3fb60 EFLAGS: 00010286 +RAX: 00000000000020ef RBX: ff1100000d6481e0 RCX: 1ffffffff0e40d82 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c +RBP: ff1100000d648230 R08: 0000000000000001 R09: fffffbfff0e395af +R10: 0000000000000001 R11: 0000000000000000 R12: ff1100000d648230 +R13: dead000000000100 R14: ff1100000d648230 R15: dffffc0000000000 +FS: 0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005620e1363990 CR3: 000000000eeb2002 CR4: 0000000000771ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + ? __warn (kernel/panic.c:748) + ? ref_tracker_dir_exit (lib/ref_tracker.c:179) + ? report_bug (lib/bug.c:201 lib/bug.c:219) + ? handle_bug (arch/x86/kernel/traps.c:285) + ? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1)) + ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621) + ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) + ? ref_tracker_dir_exit (lib/ref_tracker.c:179) + ? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158) + ? kfree (mm/slub.c:4613 mm/slub.c:4761) + net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467) + cleanup_net (net/cor +---truncated--- + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,这个 CVE 与 namespace 相关。具体来说,它涉及网络命名空间 (`netns`) 的管理问题,特别是在删除一个网络命名空间时,设备未能正确地与其关联的命名空间一起销毁。 + +2. **这是什么程序的漏洞**: + 这是 Linux 内核 (Kernel) 的漏洞。问题出在 `pfcp_newlink()` 函数中,该函数在创建设备时将设备链接到了错误的命名空间 (`dev_net(dev)` 而不是正确的 `net`)。这导致当一个网络命名空间被移除时,相关的设备仍然存活在另一个命名空间中,从而引发引用计数问题和内核崩溃。 + +3. **漏洞如何发生及效果**: + - **漏洞发生过程**: + 在创建 `pfcp` 设备时,设备被错误地链接到了 `dev_net(dev)` 命名空间,而不是创建它的实际命名空间 (`net`)。因此,当原始命名空间被删除时,设备没有被正确清理,导致引用计数不一致,并最终触发内核警告或崩溃。 + + - **漏洞效果**: + 攻击者可能通过精心构造的网络命名空间操作(如创建和删除命名空间)触发此漏洞,导致系统不稳定或服务中断。虽然此漏洞本身可能不会直接导致权限提升,但它可能被用作更复杂攻击链的一部分,尤其是在容器化环境中,影响隔离性。 + +cve: ./data/2025/21xxx/CVE-2025-21678.json +In the Linux kernel, the following vulnerability has been resolved: + +gtp: Destroy device along with udp socket's netns dismantle. + +gtp_newlink() links the device to a list in dev_net(dev) instead of +src_net, where a udp tunnel socket is created. + +Even when src_net is removed, the device stays alive on dev_net(dev). +Then, removing src_net triggers the splat below. [0] + +In this example, gtp0 is created in ns2, and the udp socket is created +in ns1. + + ip netns add ns1 + ip netns add ns2 + ip -n ns1 link add netns ns2 name gtp0 type gtp role sgsn + ip netns del ns1 + +Let's link the device to the socket's netns instead. + +Now, gtp_net_exit_batch_rtnl() needs another netdev iteration to remove +all gtp devices in the netns. + +[0]: +ref_tracker: net notrefcnt@000000003d6e7d05 has 1/2 users at + sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236) + inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252) + __sock_create (net/socket.c:1558) + udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18) + gtp_create_sock (./include/net/udp_tunnel.h:59 drivers/net/gtp.c:1423) + gtp_create_sockets (drivers/net/gtp.c:1447) + gtp_newlink (drivers/net/gtp.c:1507) + rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012) + rtnetlink_rcv_msg (net/core/rtnetlink.c:6922) + netlink_rcv_skb (net/netlink/af_netlink.c:2542) + netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347) + netlink_sendmsg (net/netlink/af_netlink.c:1891) + ____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583) + ___sys_sendmsg (net/socket.c:2639) + __sys_sendmsg (net/socket.c:2669) + do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) + +WARNING: CPU: 1 PID: 60 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179) +Modules linked in: +CPU: 1 UID: 0 PID: 60 Comm: kworker/u16:2 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Workqueue: netns cleanup_net +RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179) +Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89 +RSP: 0018:ff11000009a07b60 EFLAGS: 00010286 +RAX: 0000000000002bd3 RBX: ff1100000f4e1aa0 RCX: 1ffffffff0e40ac6 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c +RBP: ff1100000f4e1af0 R08: 0000000000000001 R09: fffffbfff0e395ae +R10: 0000000000000001 R11: 0000000000036001 R12: ff1100000f4e1af0 +R13: dead000000000100 R14: ff1100000f4e1af0 R15: dffffc0000000000 +FS: 0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f9b2464bd98 CR3: 0000000005286005 CR4: 0000000000771ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + ? __warn (kernel/panic.c:748) + ? ref_tracker_dir_exit (lib/ref_tracker.c:179) + ? report_bug (lib/bug.c:201 lib/bug.c:219) + ? handle_bug (arch/x86/kernel/traps.c:285) + ? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1)) + ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621) + ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) + ? ref_tracker_dir_exit (lib/ref_tracker.c:179) + ? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158) + ? kfree (mm/slub.c:4613 mm/slub.c:4761) + net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467) + cleanup_net (net/core/net_namespace.c:664 (discriminator 3)) + process_one_work (kernel/workqueue.c:3229) + worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391 +---truncated--- + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,这个 CVE 与 namespace 相关。具体来说,问题发生在网络命名空间(netns)的处理过程中,当一个网络命名空间被移除时,相关的设备没有正确地销毁,导致引用计数错误和内核崩溃。 + +2. **这是什么程序的漏洞:** + 这是 Linux 内核(Kernel)中的漏洞。问题出现在 `gtp_newlink()` 函数中,该函数在创建 GTP 设备时将设备链接到了错误的命名空间(`dev_net(dev)` 而不是 `src_net`)。当源命名空间(`src_net`)被删除时,设备仍然存活在另一个命名空间中,从而引发引用计数问题和内核崩溃。 + +3. **漏洞如何发生及效果:** + - **漏洞发生过程:** + 在创建 GTP 设备时,设备被错误地链接到了 `dev_net(dev)` 命名空间,而不是创建 UDP 隧道套接字的原始命名空间(`src_net`)。当 `src_net` 被删除时,GTP 设备仍然存在,并且尝试访问已经被释放的资源,导致内核崩溃。 + - **效果:** + 攻击者可以通过特定的网络命名空间操作触发此漏洞,导致系统崩溃(拒绝服务攻击,DoS)。此外,如果恶意用户能够利用此漏洞进一步控制内核行为,则可能造成更严重的后果,例如权限提升。 + +cve: ./data/2025/21xxx/CVE-2025-21745.json +In the Linux kernel, the following vulnerability has been resolved: + +blk-cgroup: Fix class @block_class's subsystem refcount leakage + +blkcg_fill_root_iostats() iterates over @block_class's devices by +class_dev_iter_(init|next)(), but does not end iterating with +class_dev_iter_exit(), so causes the class's subsystem refcount leakage. + +Fix by ending the iterating with class_dev_iter_exit(). + +analysis: 1. 这个CVE信息与namespace、cgroup、container或者容器、隔离相关。 +2. 这是Linux内核的漏洞。该漏洞发生在块设备控制组(blk-cgroup)模块中,由于在`blkcg_fill_root_iostats()`函数中使用`class_dev_iter_init/next`遍历`@block_class`的设备时,未正确调用`class_dev_iter_exit()`结束迭代,导致`class`的子系统引用计数泄漏。此漏洞可能导致资源泄漏,进而影响系统的稳定性和隔离性,尤其是在使用cgroup进行资源限制和隔离的容器环境中。 + +cve: ./data/2025/21xxx/CVE-2025-21771.json +In the Linux kernel, the following vulnerability has been resolved: + +sched_ext: Fix incorrect autogroup migration detection + +scx_move_task() is called from sched_move_task() and tells the BPF scheduler +that cgroup migration is being committed. sched_move_task() is used by both +cgroup and autogroup migrations and scx_move_task() tried to filter out +autogroup migrations by testing the destination cgroup and PF_EXITING but +this is not enough. In fact, without explicitly tagging the thread which is +doing the cgroup migration, there is no good way to tell apart +scx_move_task() invocations for racing migration to the root cgroup and an +autogroup migration. + +This led to scx_move_task() incorrectly ignoring a migration from non-root +cgroup to an autogroup of the root cgroup triggering the following warning: + + WARNING: CPU: 7 PID: 1 at kernel/sched/ext.c:3725 scx_cgroup_can_attach+0x196/0x340 + ... + Call Trace: + + cgroup_migrate_execute+0x5b1/0x700 + cgroup_attach_task+0x296/0x400 + __cgroup_procs_write+0x128/0x140 + cgroup_procs_write+0x17/0x30 + kernfs_fop_write_iter+0x141/0x1f0 + vfs_write+0x31d/0x4a0 + __x64_sys_write+0x72/0xf0 + do_syscall_64+0x82/0x160 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Fix it by adding an argument to sched_move_task() that indicates whether the +moving is for a cgroup or autogroup migration. After the change, +scx_move_task() is called only for cgroup migrations and renamed to +scx_cgroup_move_task(). + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与cgroup相关。具体来说,问题涉及cgroup迁移和autogroup迁移的检测逻辑。 + +2. **漏洞所属程序及影响分析**: + - **程序**:这是Linux内核(Kernel)中的漏洞。 + - **漏洞发生原因**:在调度器扩展(sched_ext)模块中,`scx_move_task()`函数用于通知BPF调度器关于cgroup迁移的提交。然而,该函数尝试通过检查目标cgroup和`PF_EXITING`标志来过滤掉autogroup迁移,但这种方法并不足够准确。由于没有明确标记执行cgroup迁移的线程,导致无法区分从非根cgroup迁移到根cgroup的竞赛迁移和autogroup迁移。 + - **效果**:这种不准确的检测会导致`scx_move_task()`错误地忽略从非根cgroup到根cgroup的autogroup迁移,从而触发内核警告(如`WARNING: CPU: 7 PID: 1 at kernel/sched/ext.c:3725`)。这可能会导致调度器行为异常或潜在的性能问题。 + - **修复方法**:通过向`sched_move_task()`添加一个参数,明确指示迁移是针对cgroup还是autogroup,并将`scx_move_task()`重命名为`scx_cgroup_move_task()`以专注于cgroup迁移。 + +cve: ./data/2025/21xxx/CVE-2025-21806.json +In the Linux kernel, the following vulnerability has been resolved: + +net: let net.core.dev_weight always be non-zero + +The following problem was encountered during stability test: + +(NULL net_device): NAPI poll function process_backlog+0x0/0x530 \ + returned 1, exceeding its budget of 0. +------------[ cut here ]------------ +list_add double add: new=ffff88905f746f48, prev=ffff88905f746f48, \ + next=ffff88905f746e40. +WARNING: CPU: 18 PID: 5462 at lib/list_debug.c:35 \ + __list_add_valid_or_report+0xf3/0x130 +CPU: 18 UID: 0 PID: 5462 Comm: ping Kdump: loaded Not tainted 6.13.0-rc7+ +RIP: 0010:__list_add_valid_or_report+0xf3/0x130 +Call Trace: +? __warn+0xcd/0x250 +? __list_add_valid_or_report+0xf3/0x130 +enqueue_to_backlog+0x923/0x1070 +netif_rx_internal+0x92/0x2b0 +__netif_rx+0x15/0x170 +loopback_xmit+0x2ef/0x450 +dev_hard_start_xmit+0x103/0x490 +__dev_queue_xmit+0xeac/0x1950 +ip_finish_output2+0x6cc/0x1620 +ip_output+0x161/0x270 +ip_push_pending_frames+0x155/0x1a0 +raw_sendmsg+0xe13/0x1550 +__sys_sendto+0x3bf/0x4e0 +__x64_sys_sendto+0xdc/0x1b0 +do_syscall_64+0x5b/0x170 +entry_SYSCALL_64_after_hwframe+0x76/0x7e + +The reproduction command is as follows: + sysctl -w net.core.dev_weight=0 + ping 127.0.0.1 + +This is because when the napi's weight is set to 0, process_backlog() may +return 0 and clear the NAPI_STATE_SCHED bit of napi->state, causing this +napi to be re-polled in net_rx_action() until __do_softirq() times out. +Since the NAPI_STATE_SCHED bit has been cleared, napi_schedule_rps() can +be retriggered in enqueue_to_backlog(), causing this issue. + +Making the napi's weight always non-zero solves this problem. + +Triggering this issue requires system-wide admin (setting is +not namespaced). + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关:** + 是。虽然该漏洞本身是关于Linux内核网络子系统的,但触发此问题需要系统范围的管理员权限,并且明确提到设置 `net.core.dev_weight` 不是 namespaced 的,这意味着它影响整个系统而不是特定的 namespace 或 cgroup。因此,它与 namespace 和隔离机制间接相关。 + +2. **程序漏洞分析:** + - **程序:** Linux 内核 (Kernel) + - **漏洞原因:** 该漏洞发生在 Linux 内核的网络子系统中,当 NAPI(New API)的权重 (`net.core.dev_weight`) 被设置为 0 时,`process_backlog()` 函数可能会返回 0 并清除 `NAPI_STATE_SCHED` 标志位。这会导致在 `net_rx_action()` 中反复轮询此 NAPI,直到 `__do_softirq()` 超时。同时,由于标志位被清除,`napi_schedule_rps()` 可能在 `enqueue_to_backlog()` 中被重新触发,从而引发问题。 + - **效果:** 触发此问题会导致内核警告(kernel warning),可能影响系统的稳定性和性能。尽管该问题不会直接导致系统崩溃,但它可能导致软中断处理效率低下或资源浪费。 + - **触发条件:** 需要系统管理员权限来设置 `net.core.dev_weight=0`,并且执行特定的网络操作(如 `ping 127.0.0.1`)。 + +总结:这是一个 Linux 内核的网络子系统漏洞,与 namespace 的隔离性间接相关,因为其配置不是 namespaced 的,影响整个系统。 + +cve: ./data/2025/21xxx/CVE-2025-21834.json +In the Linux kernel, the following vulnerability has been resolved: + +seccomp: passthrough uretprobe systemcall without filtering + +When attaching uretprobes to processes running inside docker, the attached +process is segfaulted when encountering the retprobe. + +The reason is that now that uretprobe is a system call the default seccomp +filters in docker block it as they only allow a specific set of known +syscalls. This is true for other userspace applications which use seccomp +to control their syscall surface. + +Since uretprobe is a "kernel implementation detail" system call which is +not used by userspace application code directly, it is impractical and +there's very little point in forcing all userspace applications to +explicitly allow it in order to avoid crashing tracked processes. + +Pass this systemcall through seccomp without depending on configuration. + +Note: uretprobe is currently only x86_64 and isn't expected to ever be +supported in i386. + +[kees: minimized changes for easier backporting, tweaked commit log] + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关** + 是的,该 CVE 信息与容器和隔离相关。具体来说,问题出现在使用 Docker 容器时,当尝试将 uretprobes 附加到运行在容器内的进程时,由于 seccomp 默认过滤规则阻止了 uretprobe 系统调用,导致目标进程崩溃(segfault)。 + +2. **程序漏洞分析** + - **这是什么程序的漏洞**:这是一个 Linux 内核(Kernel)的漏洞。 + - **漏洞如何发生**:uretprobe 被实现为一个系统调用,而默认情况下,Docker 使用 seccomp 配置来限制容器内允许的系统调用集合。由于 uretprobe 不在默认允许的系统调用列表中,因此当容器内的进程触发 uretprobe 时,seccomp 会拦截并阻止该调用,从而导致进程崩溃。 + - **漏洞效果**:此问题会导致在容器内使用 uretprobes 的调试工具或性能监控工具无法正常工作,且可能导致被监控的进程崩溃。这不仅影响调试能力,还可能引发服务中断或不稳定。 + +总结:这是一个 Linux 内核中的漏洞,与容器和隔离机制密切相关,主要影响 Docker 容器中 uretprobe 的正常使用。 + +cve: ./data/2025/21xxx/CVE-2025-21850.json +In the Linux kernel, the following vulnerability has been resolved: + +nvmet: Fix crash when a namespace is disabled + +The namespace percpu counter protects pending I/O, and we can +only safely diable the namespace once the counter drop to zero. +Otherwise we end up with a crash when running blktests/nvme/058 +(eg for loop transport): + +[ 2352.930426] [ T53909] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN PTI +[ 2352.930431] [ T53909] KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] +[ 2352.930434] [ T53909] CPU: 3 UID: 0 PID: 53909 Comm: kworker/u16:5 Tainted: G W 6.13.0-rc6 #232 +[ 2352.930438] [ T53909] Tainted: [W]=WARN +[ 2352.930440] [ T53909] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 +[ 2352.930443] [ T53909] Workqueue: nvmet-wq nvme_loop_execute_work [nvme_loop] +[ 2352.930449] [ T53909] RIP: 0010:blkcg_set_ioprio+0x44/0x180 + +as the queue is already torn down when calling submit_bio(); + +So we need to init the percpu counter in nvmet_ns_enable(), and +wait for it to drop to zero in nvmet_ns_disable() to avoid having +I/O pending after the namespace has been disabled. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 该 CVE 提到的是 Linux 内核中 `nvmet` 子系统的一个问题,涉及 namespace 的 percpu 计数器。虽然这里的 "namespace" 是指 NVMe 子系统的命名空间(与块设备相关),而不是 Linux 命名空间(如 mount、PID、network 等),因此严格来说,它与容器或隔离机制无关。 + +2. **漏洞信息分析**: + - **程序类型**:这是 Linux 内核的漏洞,具体发生在 `nvmet` 子系统中。 + - **漏洞发生原因**:在禁用 NVMe 命名空间时,没有正确等待 percpu 计数器降为零,导致可能存在未完成的 I/O 操作。当这些操作尝试访问已经被销毁的队列时,触发了 general protection fault(通常表现为内核崩溃)。 + - **漏洞效果**:此漏洞会导致系统在特定条件下(例如运行 blktests/nvme/058 测试)出现内核 oops 或崩溃,影响系统的稳定性。 + +**结论**:N/A + +cve: ./data/2025/21xxx/CVE-2025-21860.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/zswap: fix inconsistency when zswap_store_page() fails + +Commit b7c0ccdfbafd ("mm: zswap: support large folios in zswap_store()") +skips charging any zswap entries when it failed to zswap the entire folio. + +However, when some base pages are zswapped but it failed to zswap the +entire folio, the zswap operation is rolled back. When freeing zswap +entries for those pages, zswap_entry_free() uncharges the zswap entries +that were not previously charged, causing zswap charging to become +inconsistent. + +This inconsistency triggers two warnings with following steps: + # On a machine with 64GiB of RAM and 36GiB of zswap + $ stress-ng --bigheap 2 # wait until the OOM-killer kills stress-ng + $ sudo reboot + + The two warnings are: + in mm/memcontrol.c:163, function obj_cgroup_release(): + WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1)); + + in mm/page_counter.c:60, function page_counter_cancel(): + if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n", + new, nr_pages)) + +zswap_stored_pages also becomes inconsistent in the same way. + +As suggested by Kanchana, increment zswap_stored_pages and charge zswap +entries within zswap_store_page() when it succeeds. This way, +zswap_entry_free() will decrement the counter and uncharge the entries +when it failed to zswap the entire folio. + +While this could potentially be optimized by batching objcg charging and +incrementing the counter, let's focus on fixing the bug this time and +leave the optimization for later after some evaluation. + +After resolving the inconsistency, the warnings disappear. + +[42.hyeyoo@gmail.com: refactor zswap_store_page()] + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 该CVE描述中提到`zswap`和内存管理相关的代码路径,包括`obj_cgroup_release()`和`page_counter_cancel()`函数。这些函数与cgroup(控制组)的内存控制器有关,因此可以认为该漏洞与cgroup相关。但并未直接涉及namespace、container或隔离机制。 + +2. **程序漏洞分析**: + - **程序**:这是Linux内核(Kernel)中的一个漏洞。 + - **漏洞发生原因**:在`zswap_store_page()`函数中,当部分基础页面(base pages)被成功压缩存储到zswap中,但由于某些原因未能完成整个大页(folio)的压缩时,内核会回滚zswap操作。然而,在释放这些部分页面对应的zswap条目时,`zswap_entry_free()`函数错误地取消了未正确计费的zswap条目,导致zswap计数器和内存控制器之间的状态不一致。 + - **漏洞效果**:这种不一致性会触发两个警告: + 1. 在`obj_cgroup_release()`中触发`WARN_ON_ONCE`,提示字节数存在对齐问题。 + 2. 在`page_counter_cancel()`中触发`WARN_ONCE`,提示页面计数器出现负值(即下溢)。 + 此外,`zswap_stored_pages`计数器也会变得不一致,可能导致内存管理逻辑进一步混乱。 + +**结论**:该CVE与cgroup相关,属于Linux内核的内存管理子系统漏洞,影响内存控制器的正确性,但不直接涉及namespace、container或隔离机制。 + +cve: ./data/2025/21xxx/CVE-2025-21861.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/migrate_device: don't add folio to be freed to LRU in migrate_device_finalize() + +If migration succeeded, we called +folio_migrate_flags()->mem_cgroup_migrate() to migrate the memcg from the +old to the new folio. This will set memcg_data of the old folio to 0. + +Similarly, if migration failed, memcg_data of the dst folio is left unset. + +If we call folio_putback_lru() on such folios (memcg_data == 0), we will +add the folio to be freed to the LRU, making memcg code unhappy. Running +the hmm selftests: + + # ./hmm-tests + ... + # RUN hmm.hmm_device_private.migrate ... + [ 102.078007][T14893] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x7ff27d200 pfn:0x13cc00 + [ 102.079974][T14893] anon flags: 0x17ff00000020018(uptodate|dirty|swapbacked|node=0|zone=2|lastcpupid=0x7ff) + [ 102.082037][T14893] raw: 017ff00000020018 dead000000000100 dead000000000122 ffff8881353896c9 + [ 102.083687][T14893] raw: 00000007ff27d200 0000000000000000 00000001ffffffff 0000000000000000 + [ 102.085331][T14893] page dumped because: VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled()) + [ 102.087230][T14893] ------------[ cut here ]------------ + [ 102.088279][T14893] WARNING: CPU: 0 PID: 14893 at ./include/linux/memcontrol.h:726 folio_lruvec_lock_irqsave+0x10e/0x170 + [ 102.090478][T14893] Modules linked in: + [ 102.091244][T14893] CPU: 0 UID: 0 PID: 14893 Comm: hmm-tests Not tainted 6.13.0-09623-g6c216bc522fd #151 + [ 102.093089][T14893] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 + [ 102.094848][T14893] RIP: 0010:folio_lruvec_lock_irqsave+0x10e/0x170 + [ 102.096104][T14893] Code: ... + [ 102.099908][T14893] RSP: 0018:ffffc900236c37b0 EFLAGS: 00010293 + [ 102.101152][T14893] RAX: 0000000000000000 RBX: ffffea0004f30000 RCX: ffffffff8183f426 + [ 102.102684][T14893] RDX: ffff8881063cb880 RSI: ffffffff81b8117f RDI: ffff8881063cb880 + [ 102.104227][T14893] RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000 + [ 102.105757][T14893] R10: 0000000000000001 R11: 0000000000000002 R12: ffffc900236c37d8 + [ 102.107296][T14893] R13: ffff888277a2bcb0 R14: 000000000000001f R15: 0000000000000000 + [ 102.108830][T14893] FS: 00007ff27dbdd740(0000) GS:ffff888277a00000(0000) knlGS:0000000000000000 + [ 102.110643][T14893] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 102.111924][T14893] CR2: 00007ff27d400000 CR3: 000000010866e000 CR4: 0000000000750ef0 + [ 102.113478][T14893] PKRU: 55555554 + [ 102.114172][T14893] Call Trace: + [ 102.114805][T14893] + [ 102.115397][T14893] ? folio_lruvec_lock_irqsave+0x10e/0x170 + [ 102.116547][T14893] ? __warn.cold+0x110/0x210 + [ 102.117461][T14893] ? folio_lruvec_lock_irqsave+0x10e/0x170 + [ 102.118667][T14893] ? report_bug+0x1b9/0x320 + [ 102.119571][T14893] ? handle_bug+0x54/0x90 + [ 102.120494][T14893] ? exc_invalid_op+0x17/0x50 + [ 102.121433][T14893] ? asm_exc_invalid_op+0x1a/0x20 + [ 102.122435][T14893] ? __wake_up_klogd.part.0+0x76/0xd0 + [ 102.123506][T14893] ? dump_page+0x4f/0x60 + [ 102.124352][T14893] ? folio_lruvec_lock_irqsave+0x10e/0x170 + [ 102.125500][T14893] folio_batch_move_lru+0xd4/0x200 + [ 102.126577][T14893] ? __pfx_lru_add+0x10/0x10 + [ 102.127505][T14893] __folio_batch_add_and_move+0x391/0x720 + [ 102.128633][T14893] ? __pfx_lru_add+0x10/0x10 + [ 102.129550][T14893] folio_putback_lru+0x16/0x80 + [ 102.130564][T14893] migrate_device_finalize+0x9b/0x530 + [ 102.131640][T14893] dmirror_migrate_to_device.constprop.0+0x7c5/0xad0 + [ 102.133047][T14893] dmirror_fops_unlocked_ioctl+0x89b/0xc80 + +Likely, nothing else goes wrong: putting the last folio reference will +remove the folio from the LRU again. So besides memcg complaining, adding +the folio to be freed to the LRU is just an unnecessary step. + +The new flow resembles what we have in migrate_folio_move(): add the dst +to the lru, rem +---truncated--- + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与cgroup(控制组)相关。具体来说,问题涉及到内存控制组(memcg)在页迁移过程中的处理逻辑。 + +2. **这是什么程序的漏洞**: + - 这是**Linux内核(Kernel)**的漏洞。 + - 漏洞发生在内存管理子系统中,特别是在页迁移(page migration)过程中对内存控制组(memcg)的处理。 + - **漏洞发生原因**:当页迁移成功或失败时,`memcg_data`字段可能被错误地设置为0,导致在调用`folio_putback_lru()`时将需要释放的页错误地添加到LRU列表中。这违反了memcg的预期行为,可能导致内存资源分配混乱。 + - **效果**:虽然最终页会被正确释放,但这种不必要的操作会导致memcg代码发出警告(WARNING),影响系统的稳定性和性能。 + +总结:该CVE与cgroup相关,是Linux内核内存管理子系统中的一个漏洞,涉及页迁移和memcg的交互问题。 + +cve: ./data/2025/21xxx/CVE-2025-21884.json +In the Linux kernel, the following vulnerability has been resolved: + +net: better track kernel sockets lifetime + +While kernel sockets are dismantled during pernet_operations->exit(), +their freeing can be delayed by any tx packets still held in qdisc +or device queues, due to skb_set_owner_w() prior calls. + +This then trigger the following warning from ref_tracker_dir_exit() [1] + +To fix this, make sure that kernel sockets own a reference on net->passive. + +Add sk_net_refcnt_upgrade() helper, used whenever a kernel socket +is converted to a refcounted one. + +[1] + +[ 136.263918][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at +[ 136.263918][ T35] sk_alloc+0x2b3/0x370 +[ 136.263918][ T35] inet6_create+0x6ce/0x10f0 +[ 136.263918][ T35] __sock_create+0x4c0/0xa30 +[ 136.263918][ T35] inet_ctl_sock_create+0xc2/0x250 +[ 136.263918][ T35] igmp6_net_init+0x39/0x390 +[ 136.263918][ T35] ops_init+0x31e/0x590 +[ 136.263918][ T35] setup_net+0x287/0x9e0 +[ 136.263918][ T35] copy_net_ns+0x33f/0x570 +[ 136.263918][ T35] create_new_namespaces+0x425/0x7b0 +[ 136.263918][ T35] unshare_nsproxy_namespaces+0x124/0x180 +[ 136.263918][ T35] ksys_unshare+0x57d/0xa70 +[ 136.263918][ T35] __x64_sys_unshare+0x38/0x40 +[ 136.263918][ T35] do_syscall_64+0xf3/0x230 +[ 136.263918][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 136.263918][ T35] +[ 136.343488][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at +[ 136.343488][ T35] sk_alloc+0x2b3/0x370 +[ 136.343488][ T35] inet6_create+0x6ce/0x10f0 +[ 136.343488][ T35] __sock_create+0x4c0/0xa30 +[ 136.343488][ T35] inet_ctl_sock_create+0xc2/0x250 +[ 136.343488][ T35] ndisc_net_init+0xa7/0x2b0 +[ 136.343488][ T35] ops_init+0x31e/0x590 +[ 136.343488][ T35] setup_net+0x287/0x9e0 +[ 136.343488][ T35] copy_net_ns+0x33f/0x570 +[ 136.343488][ T35] create_new_namespaces+0x425/0x7b0 +[ 136.343488][ T35] unshare_nsproxy_namespaces+0x124/0x180 +[ 136.343488][ T35] ksys_unshare+0x57d/0xa70 +[ 136.343488][ T35] __x64_sys_unshare+0x38/0x40 +[ 136.343488][ T35] do_syscall_64+0xf3/0x230 +[ 136.343488][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**:是的,这个 CVE 信息与 namespace 相关。具体来说,问题出现在 `copy_net_ns` 和 `create_new_namespaces` 函数中,这些函数与网络命名空间(network namespace)的创建和管理有关。 + +2. **这是什么程序的漏洞**:这是 Linux 内核(Kernel)的漏洞。漏洞发生在内核对网络命名空间中 kernel sockets 生命周期的管理上。具体来说,当网络命名空间被销毁时,kernel sockets 的释放可能会因为队列中仍然存在的数据包而延迟,导致引用计数不一致的问题。 + +3. **漏洞如何发生及效果**: + - 漏洞发生的原因是 kernel sockets 在网络命名空间销毁时未能正确处理其引用计数,导致引用计数警告(ref_tracker_dir_exit() 警告)。 + - 效果是可能导致内核日志中出现警告信息,并且在极端情况下可能引发内核崩溃或资源泄漏。虽然该问题本身不一定直接导致安全风险,但它可能间接影响系统的稳定性,尤其是在涉及频繁创建和销毁网络命名空间的场景下(例如容器环境中)。 + +cve: ./data/2025/21xxx/CVE-2025-21913.json +In the Linux kernel, the following vulnerability has been resolved: + +x86/amd_nb: Use rdmsr_safe() in amd_get_mmconfig_range() + +Xen doesn't offer MSR_FAM10H_MMIO_CONF_BASE to all guests. This results +in the following warning: + + unchecked MSR access error: RDMSR from 0xc0010058 at rIP: 0xffffffff8101d19f (xen_do_read_msr+0x7f/0xa0) + Call Trace: + xen_read_msr+0x1e/0x30 + amd_get_mmconfig_range+0x2b/0x80 + quirk_amd_mmconfig_area+0x28/0x100 + pnp_fixup_device+0x39/0x50 + __pnp_add_device+0xf/0x150 + pnp_add_device+0x3d/0x100 + pnpacpi_add_device_handler+0x1f9/0x280 + acpi_ns_get_device_callback+0x104/0x1c0 + acpi_ns_walk_namespace+0x1d0/0x260 + acpi_get_devices+0x8a/0xb0 + pnpacpi_init+0x50/0x80 + do_one_initcall+0x46/0x2e0 + kernel_init_freeable+0x1da/0x2f0 + kernel_init+0x16/0x1b0 + ret_from_fork+0x30/0x50 + ret_from_fork_asm+0x1b/0x30 + +based on quirks for a "PNP0c01" device. Treating MMCFG as disabled is the +right course of action, so no change is needed there. + +This was most likely exposed by fixing the Xen MSR accessors to not be +silently-safe. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **漏洞相关信息**: + - 这是**Linux内核**的漏洞,具体发生在x86架构下的AMD NB(Northbridge)相关的代码中。 + - 漏洞原因:在`amd_get_mmconfig_range()`函数中使用了不安全的MSR(Model-Specific Register)读取操作。Xen虚拟化环境中,并未为所有虚拟机暴露`MSR_FAM10H_MMIO_CONF_BASE`寄存器,这导致尝试读取该寄存器时触发警告。 + - 漏洞效果:虽然不会直接导致系统崩溃或安全问题,但会生成警告信息,可能影响系统的稳定性和可维护性。 + - 修复方法:通过使用`rdmsr_safe()`替代原始的MSR读取操作,确保在无法访问特定寄存器时能够安全处理。 + +3. **结论**:此CVE与namespace、cgroup、container或容器隔离无关,因此无需进一步分析其对容器技术的影响。 + +cve: ./data/2025/21xxx/CVE-2025-21975.json +In the Linux kernel, the following vulnerability has been resolved: + +net/mlx5: handle errors in mlx5_chains_create_table() + +In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() +and mlx5_get_flow_namespace() must be checked to prevent NULL pointer +dereferences. If either function fails, the function should log error +message with mlx5_core_warn() and return error pointer. + +analysis: 1. 该CVE信息与namespace相关,因为涉及到`mlx5_get_fdb_sub_ns()`和`mlx5_get_flow_namespace()`函数。 +2. 这是Linux内核的漏洞。漏洞发生在`mlx5_chains_create_table()`函数中,当`mlx5_get_fdb_sub_ns()`或`mlx5_get_flow_namespace()`返回失败时,没有正确检查返回值,可能导致空指针解引用。攻击者可能利用此漏洞导致系统崩溃(拒绝服务攻击)或潜在的权限提升。 + +结论:该CVE与namespace相关,属于Linux内核的漏洞。 + +cve: ./data/2025/21xxx/CVE-2025-21983.json +In the Linux kernel, the following vulnerability has been resolved: + +mm/slab/kvfree_rcu: Switch to WQ_MEM_RECLAIM wq + +Currently kvfree_rcu() APIs use a system workqueue which is +"system_unbound_wq" to driver RCU machinery to reclaim a memory. + +Recently, it has been noted that the following kernel warning can +be observed: + + +workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work + WARNING: CPU: 21 PID: 330 at kernel/workqueue.c:3719 check_flush_dependency+0x112/0x120 + Modules linked in: intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) ... + CPU: 21 UID: 0 PID: 330 Comm: kworker/u144:6 Tainted: G E 6.13.2-0_g925d379822da #1 + Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM20 02/01/2023 + Workqueue: nvme-wq nvme_scan_work + RIP: 0010:check_flush_dependency+0x112/0x120 + Code: 05 9a 40 14 02 01 48 81 c6 c0 00 00 00 48 8b 50 18 48 81 c7 c0 00 00 00 48 89 f9 48 ... + RSP: 0018:ffffc90000df7bd8 EFLAGS: 00010082 + RAX: 000000000000006a RBX: ffffffff81622390 RCX: 0000000000000027 + RDX: 00000000fffeffff RSI: 000000000057ffa8 RDI: ffff88907f960c88 + RBP: 0000000000000000 R08: ffffffff83068e50 R09: 000000000002fffd + R10: 0000000000000004 R11: 0000000000000000 R12: ffff8881001a4400 + R13: 0000000000000000 R14: ffff88907f420fb8 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff88907f940000(0000) knlGS:0000000000000000 + CR2: 00007f60c3001000 CR3: 000000107d010005 CR4: 00000000007726f0 + PKRU: 55555554 + Call Trace: + + ? __warn+0xa4/0x140 + ? check_flush_dependency+0x112/0x120 + ? report_bug+0xe1/0x140 + ? check_flush_dependency+0x112/0x120 + ? handle_bug+0x5e/0x90 + ? exc_invalid_op+0x16/0x40 + ? asm_exc_invalid_op+0x16/0x20 + ? timer_recalc_next_expiry+0x190/0x190 + ? check_flush_dependency+0x112/0x120 + ? check_flush_dependency+0x112/0x120 + __flush_work.llvm.1643880146586177030+0x174/0x2c0 + flush_rcu_work+0x28/0x30 + kvfree_rcu_barrier+0x12f/0x160 + kmem_cache_destroy+0x18/0x120 + bioset_exit+0x10c/0x150 + disk_release.llvm.6740012984264378178+0x61/0xd0 + device_release+0x4f/0x90 + kobject_put+0x95/0x180 + nvme_put_ns+0x23/0xc0 + nvme_remove_invalid_namespaces+0xb3/0xd0 + nvme_scan_work+0x342/0x490 + process_scheduled_works+0x1a2/0x370 + worker_thread+0x2ff/0x390 + ? pwq_release_workfn+0x1e0/0x1e0 + kthread+0xb1/0xe0 + ? __kthread_parkme+0x70/0x70 + ret_from_fork+0x30/0x40 + ? __kthread_parkme+0x70/0x70 + ret_from_fork_asm+0x11/0x20 + + ---[ end trace 0000000000000000 ]--- + + +To address this switch to use of independent WQ_MEM_RECLAIM +workqueue, so the rules are not violated from workqueue framework +point of view. + +Apart of that, since kvfree_rcu() does reclaim memory it is worth +to go with WQ_MEM_RECLAIM type of wq because it is designed for +this purpose. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **这是什么程序的漏洞,如何发生,有何效果**: + 这是Linux内核中的一个漏洞。问题出在`kvfree_rcu()`函数中,它使用了一个名为`system_unbound_wq`的系统工作队列来驱动RCU(Read-Copy-Update)机制以回收内存。然而,这种使用方式违反了工作队列框架的规则,导致内核发出警告(kernel warning)。具体来说,`system_unbound_wq`不应该处理带有`WQ_MEM_RECLAIM`标志的任务,但这里却发生了冲突。 + + 修复方法是将`kvfree_rcu()`切换到使用独立的`WQ_MEM_RECLAIM`类型的工作队列,以符合工作队列框架的设计规则。这样可以避免潜在的死锁或其他内存管理问题。 + + 漏洞效果主要是可能导致内核不稳定,例如触发警告或更严重的系统崩溃,尤其是在内存压力较大的情况下。 + +3. **N/A** + +cve: ./data/2025/22xxx/CVE-2025-22077.json +In the Linux kernel, the following vulnerability has been resolved: + +Revert "smb: client: fix TCP timers deadlock after rmmod" + +This reverts commit e9f2517a3e18a54a3943c098d2226b245d488801. + +Commit e9f2517a3e18 ("smb: client: fix TCP timers deadlock after +rmmod") is intended to fix a null-ptr-deref in LOCKDEP, which is +mentioned as CVE-2024-54680, but is actually did not fix anything; +The issue can be reproduced on top of it. [0] + +Also, it reverted the change by commit ef7134c7fc48 ("smb: client: +Fix use-after-free of network namespace.") and introduced a real +issue by reviving the kernel TCP socket. + +When a reconnect happens for a CIFS connection, the socket state +transitions to FIN_WAIT_1. Then, inet_csk_clear_xmit_timers_sync() +in tcp_close() stops all timers for the socket. + +If an incoming FIN packet is lost, the socket will stay at FIN_WAIT_1 +forever, and such sockets could be leaked up to net.ipv4.tcp_max_orphans. + +Usually, FIN can be retransmitted by the peer, but if the peer aborts +the connection, the issue comes into reality. + +I warned about this privately by pointing out the exact report [1], +but the bogus fix was finally merged. + +So, we should not stop the timers to finally kill the connection on +our side in that case, meaning we must not use a kernel socket for +TCP whose sk->sk_net_refcnt is 0. + +The kernel socket does not have a reference to its netns to make it +possible to tear down netns without cleaning up every resource in it. + +For example, tunnel devices use a UDP socket internally, but we can +destroy netns without removing such devices and let it complete +during exit. Otherwise, netns would be leaked when the last application +died. + +However, this is problematic for TCP sockets because TCP has timers to +close the connection gracefully even after the socket is close()d. The +lifetime of the socket and its netns is different from the lifetime of +the underlying connection. + +If the socket user does not maintain the netns lifetime, the timer could +be fired after the socket is close()d and its netns is freed up, resulting +in use-after-free. + +Actually, we have seen so many similar issues and converted such sockets +to have a reference to netns. + +That's why I converted the CIFS client socket to have a reference to +netns (sk->sk_net_refcnt == 1), which is somehow mentioned as out-of-scope +of CIFS and technically wrong in e9f2517a3e18, but **is in-scope and right +fix**. + +Regarding the LOCKDEP issue, we can prevent the module unload by +bumping the module refcount when switching the LOCKDDEP key in +sock_lock_init_class_and_name(). [2] + +For a while, let's revert the bogus fix. + +Note that now we can use sk_net_refcnt_upgrade() for the socket +conversion, but I'll do so later separately to make backport easy. + +analysis: 1. **分析这个CVE信息是否与namespace、cgroup、container或者容器、隔离相关** + 是的,该CVE与namespace相关。具体来说,问题涉及网络命名空间(netns)和内核TCP套接字的生命周期管理。由于TCP套接字可能引用网络命名空间,如果处理不当,会导致命名空间泄漏或释放后使用(use-after-free)问题。 + +2. **这是什么程序的漏洞,是内核Kernel,还是容器实现Docker,还是容器内部运行的应用?** + 这是Linux内核(Kernel)的漏洞。漏洞发生在CIFS客户端的TCP套接字处理逻辑中,当网络命名空间被销毁时,未正确管理TCP套接字的生命周期,可能导致资源泄漏或use-after-free问题。 + +3. **漏洞如何发生,它有何效果?** + 漏洞发生在以下场景: + - 当CIFS客户端重新连接时,TCP套接字进入`FIN_WAIT_1`状态。 + - 如果对端的FIN包丢失,且对端终止连接,则内核停止所有套接字计时器,导致套接字永远停留在`FIN_WAIT_1`状态。 + - 这种情况下,未正确引用网络命名空间的TCP套接字可能会泄漏,直到达到`net.ipv4.tcp_max_orphans`限制。 + - 更严重的是,如果网络命名空间在套接字关闭后被释放,而计时器仍然触发操作,可能导致use-after-free问题,进而引发系统崩溃或潜在的安全风险。 + +总结:这是一个与网络命名空间相关的Linux内核漏洞,主要影响CIFS客户端的TCP套接字管理,可能导致资源泄漏或use-after-free问题。 + +cve: ./data/2025/22xxx/CVE-2025-22089.json +In the Linux kernel, the following vulnerability has been resolved: + +RDMA/core: Don't expose hw_counters outside of init net namespace + +Commit 467f432a521a ("RDMA/core: Split port and device counter sysfs +attributes") accidentally almost exposed hw counters to non-init net +namespaces. It didn't expose them fully, as an attempt to read any of +those counters leads to a crash like this one: + +[42021.807566] BUG: kernel NULL pointer dereference, address: 0000000000000028 +[42021.814463] #PF: supervisor read access in kernel mode +[42021.819549] #PF: error_code(0x0000) - not-present page +[42021.824636] PGD 0 P4D 0 +[42021.827145] Oops: 0000 [#1] SMP PTI +[42021.830598] CPU: 82 PID: 2843922 Comm: switchto-defaul Kdump: loaded Tainted: G S W I XXX +[42021.841697] Hardware name: XXX +[42021.849619] RIP: 0010:hw_stat_device_show+0x1e/0x40 [ib_core] +[42021.855362] Code: 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 49 89 d0 4c 8b 5e 20 48 8b 8f b8 04 00 00 48 81 c7 f0 fa ff ff <48> 8b 41 28 48 29 ce 48 83 c6 d0 48 c1 ee 04 69 d6 ab aa aa aa 48 +[42021.873931] RSP: 0018:ffff97fe90f03da0 EFLAGS: 00010287 +[42021.879108] RAX: ffff9406988a8c60 RBX: ffff940e1072d438 RCX: 0000000000000000 +[42021.886169] RDX: ffff94085f1aa000 RSI: ffff93c6cbbdbcb0 RDI: ffff940c7517aef0 +[42021.893230] RBP: ffff97fe90f03e70 R08: ffff94085f1aa000 R09: 0000000000000000 +[42021.900294] R10: ffff94085f1aa000 R11: ffffffffc0775680 R12: ffffffff87ca2530 +[42021.907355] R13: ffff940651602840 R14: ffff93c6cbbdbcb0 R15: ffff94085f1aa000 +[42021.914418] FS: 00007fda1a3b9700(0000) GS:ffff94453fb80000(0000) knlGS:0000000000000000 +[42021.922423] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[42021.928130] CR2: 0000000000000028 CR3: 00000042dcfb8003 CR4: 00000000003726f0 +[42021.935194] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[42021.942257] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[42021.949324] Call Trace: +[42021.951756] +[42021.953842] [] ? show_regs+0x64/0x70 +[42021.959030] [] ? __die+0x78/0xc0 +[42021.963874] [] ? page_fault_oops+0x2b5/0x3b0 +[42021.969749] [] ? exc_page_fault+0x1a2/0x3c0 +[42021.975549] [] ? asm_exc_page_fault+0x26/0x30 +[42021.981517] [] ? __pfx_show_hw_stats+0x10/0x10 [ib_core] +[42021.988482] [] ? hw_stat_device_show+0x1e/0x40 [ib_core] +[42021.995438] [] dev_attr_show+0x1e/0x50 +[42022.000803] [] sysfs_kf_seq_show+0x81/0xe0 +[42022.006508] [] seq_read_iter+0xf4/0x410 +[42022.011954] [] vfs_read+0x16e/0x2f0 +[42022.017058] [] ksys_read+0x6e/0xe0 +[42022.022073] [] do_syscall_64+0x6a/0xa0 +[42022.027441] [] entry_SYSCALL_64_after_hwframe+0x78/0xe2 + +The problem can be reproduced using the following steps: + ip netns add foo + ip netns exec foo bash + cat /sys/class/infiniband/mlx4_0/hw_counters/* + +The panic occurs because of casting the device pointer into an +ib_device pointer using container_of() in hw_stat_device_show() is +wrong and leads to a memory corruption. + +However the real problem is that hw counters should never been exposed +outside of the non-init net namespace. + +Fix this by saving the index of the corresponding attribute group +(it might be 1 or 2 depending on the presence of driver-specific +attributes) and zeroing the pointer to hw_counters group for compat +devices during the initialization. + +With this fix applied hw_counters are not available in a non-init +net namespace: + find /sys/class/infiniband/mlx4_0/ -name hw_counters + /sys/class/infiniband/mlx4_0/ports/1/hw_counters + /sys/class/infiniband/mlx4_0/ports/2/hw_counters + /sys/class/infiniband/mlx4_0/hw_counters + + ip netns add foo + ip netns exec foo bash + find /sys/class/infiniband/mlx4_0/ -name hw_counters + +analysis: 1. **分析这个CVE信息是否与namespace、cgroup、container或者容器、隔离相关** + 是的,该漏洞与namespace相关。具体来说,它涉及网络命名空间(net namespace)的隔离问题。hw_counters本应仅在初始网络命名空间(init net namespace)中可见,但由于实现错误,它们可能被错误地暴露到非初始网络命名空间中。 + +2. **这是什么程序的漏洞,是内核Kernel,还是容器实现Docker,还是容器内部运行的应用** + - 该漏洞发生在Linux内核(Kernel)中,具体是在RDMA(Remote Direct Memory Access)核心模块的实现部分。 + - 漏洞发生的原因是,在`hw_stat_device_show()`函数中,使用`container_of()`将设备指针转换为`ib_device`指针时出现了错误,导致内存访问崩溃。 + - 效果上,这一问题可能导致系统崩溃(kernel panic),因为尝试读取hw_counters时会触发空指针解引用错误。 + - 此外,虽然hw_counters未完全暴露,但其存在本身违反了网络命名空间的隔离性,可能会被恶意用户利用来绕过隔离机制或进行进一步攻击。 + +3. **总结** + - 该漏洞属于Linux内核中的RDMA模块,与网络命名空间的隔离性相关。 + - 它可能导致系统崩溃,并且违反了命名空间的隔离设计原则。 + +cve: ./data/2025/22xxx/CVE-2025-22105.json +In the Linux kernel, the following vulnerability has been resolved: + +bonding: check xdp prog when set bond mode + +Following operations can trigger a warning[1]: + + ip netns add ns1 + ip netns exec ns1 ip link add bond0 type bond mode balance-rr + ip netns exec ns1 ip link set dev bond0 xdp obj af_xdp_kern.o sec xdp + ip netns exec ns1 ip link set bond0 type bond mode broadcast + ip netns del ns1 + +When delete the namespace, dev_xdp_uninstall() is called to remove xdp +program on bond dev, and bond_xdp_set() will check the bond mode. If bond +mode is changed after attaching xdp program, the warning may occur. + +Some bond modes (broadcast, etc.) do not support native xdp. Set bond mode +with xdp program attached is not good. Add check for xdp program when set +bond mode. + + [1] + ------------[ cut here ]------------ + WARNING: CPU: 0 PID: 11 at net/core/dev.c:9912 unregister_netdevice_many_notify+0x8d9/0x930 + Modules linked in: + CPU: 0 UID: 0 PID: 11 Comm: kworker/u4:0 Not tainted 6.14.0-rc4 #107 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 + Workqueue: netns cleanup_net + RIP: 0010:unregister_netdevice_many_notify+0x8d9/0x930 + Code: 00 00 48 c7 c6 6f e3 a2 82 48 c7 c7 d0 b3 96 82 e8 9c 10 3e ... + RSP: 0018:ffffc90000063d80 EFLAGS: 00000282 + RAX: 00000000ffffffa1 RBX: ffff888004959000 RCX: 00000000ffffdfff + RDX: 0000000000000000 RSI: 00000000ffffffea RDI: ffffc90000063b48 + RBP: ffffc90000063e28 R08: ffffffff82d39b28 R09: 0000000000009ffb + R10: 0000000000000175 R11: ffffffff82d09b40 R12: ffff8880049598e8 + R13: 0000000000000001 R14: dead000000000100 R15: ffffc90000045000 + FS: 0000000000000000(0000) GS:ffff888007a00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000d406b60 CR3: 000000000483e000 CR4: 00000000000006f0 + Call Trace: + + ? __warn+0x83/0x130 + ? unregister_netdevice_many_notify+0x8d9/0x930 + ? report_bug+0x18e/0x1a0 + ? handle_bug+0x54/0x90 + ? exc_invalid_op+0x18/0x70 + ? asm_exc_invalid_op+0x1a/0x20 + ? unregister_netdevice_many_notify+0x8d9/0x930 + ? bond_net_exit_batch_rtnl+0x5c/0x90 + cleanup_net+0x237/0x3d0 + process_one_work+0x163/0x390 + worker_thread+0x293/0x3b0 + ? __pfx_worker_thread+0x10/0x10 + kthread+0xec/0x1e0 + ? __pfx_kthread+0x10/0x10 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x2f/0x50 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + ---[ end trace 0000000000000000 ]--- + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与namespace相关。描述中提到通过`ip netns add`创建了一个网络命名空间,并在其中执行了一系列操作,包括设置bond模式和安装XDP程序。这表明问题发生在Linux内核的网络命名空间(network namespace)机制中。 + +2. **这是什么程序的漏洞**: + 这是**Linux内核(Kernel)**的漏洞,具体涉及网络子系统中的bonding驱动程序。漏洞发生的原因是在删除网络命名空间时,`dev_xdp_uninstall()`函数被调用以卸载绑定设备上的XDP程序,而`bond_xdp_set()`会检查bond模式。如果在安装XDP程序后更改了bond模式(例如从`balance-rr`改为`broadcast`),可能会触发警告,因为某些bond模式(如`broadcast`)不支持原生XDP。 + +3. **漏洞效果**: + 该漏洞可能导致内核发出警告(WARNING),并可能引发进一步的不稳定行为,例如崩溃或数据处理异常。虽然这不是一个直接的权限提升或逃逸漏洞,但它暴露了内核在网络命名空间和XDP程序管理方面的潜在问题。对于使用容器化技术(如Docker或Kubernetes)的环境,这种漏洞可能会影响依赖网络命名空间隔离的工作负载稳定性。 + +cve: ./data/2025/22xxx/CVE-2025-22121.json +In the Linux kernel, the following vulnerability has been resolved: + +ext4: fix out-of-bound read in ext4_xattr_inode_dec_ref_all() + +There's issue as follows: +BUG: KASAN: use-after-free in ext4_xattr_inode_dec_ref_all+0x6ff/0x790 +Read of size 4 at addr ffff88807b003000 by task syz-executor.0/15172 + +CPU: 3 PID: 15172 Comm: syz-executor.0 +Call Trace: + __dump_stack lib/dump_stack.c:82 [inline] + dump_stack+0xbe/0xfd lib/dump_stack.c:123 + print_address_description.constprop.0+0x1e/0x280 mm/kasan/report.c:400 + __kasan_report.cold+0x6c/0x84 mm/kasan/report.c:560 + kasan_report+0x3a/0x50 mm/kasan/report.c:585 + ext4_xattr_inode_dec_ref_all+0x6ff/0x790 fs/ext4/xattr.c:1137 + ext4_xattr_delete_inode+0x4c7/0xda0 fs/ext4/xattr.c:2896 + ext4_evict_inode+0xb3b/0x1670 fs/ext4/inode.c:323 + evict+0x39f/0x880 fs/inode.c:622 + iput_final fs/inode.c:1746 [inline] + iput fs/inode.c:1772 [inline] + iput+0x525/0x6c0 fs/inode.c:1758 + ext4_orphan_cleanup fs/ext4/super.c:3298 [inline] + ext4_fill_super+0x8c57/0xba40 fs/ext4/super.c:5300 + mount_bdev+0x355/0x410 fs/super.c:1446 + legacy_get_tree+0xfe/0x220 fs/fs_context.c:611 + vfs_get_tree+0x8d/0x2f0 fs/super.c:1576 + do_new_mount fs/namespace.c:2983 [inline] + path_mount+0x119a/0x1ad0 fs/namespace.c:3316 + do_mount+0xfc/0x110 fs/namespace.c:3329 + __do_sys_mount fs/namespace.c:3540 [inline] + __se_sys_mount+0x219/0x2e0 fs/namespace.c:3514 + do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x67/0xd1 + +Memory state around the buggy address: + ffff88807b002f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff88807b002f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +>ffff88807b003000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff88807b003080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88807b003100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + +Above issue happens as ext4_xattr_delete_inode() isn't check xattr +is valid if xattr is in inode. +To solve above issue call xattr_check_inode() check if xattr if valid +in inode. In fact, we can directly verify in ext4_iget_extra_inode(), +so that there is no divergent verification. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + N/A + +2. **程序漏洞分析**: + 这是一个**Linux内核**的漏洞,具体发生在`ext4`文件系统模块中。漏洞是由于在`ext4_xattr_inode_dec_ref_all()`函数中存在越界读取(out-of-bound read)问题,导致了使用后释放(use-after-free)的情况。该问题可能引发内存损坏或系统崩溃,从而影响系统的稳定性。 + + 漏洞发生的原因是`ext4_xattr_delete_inode()`函数没有正确检查扩展属性(xattr)是否有效,如果扩展属性存储在inode中,则可能导致非法访问已释放的内存。修复方法是通过调用`xattr_check_inode()`来验证扩展属性的有效性,或者直接在`ext4_iget_extra_inode()`中进行验证,以避免多次验证的不一致性。 + + 该漏洞的效果包括可能导致内核崩溃、数据丢失或被恶意利用以执行提权攻击。 + +3. **总结**: + N/A + +cve: ./data/2025/27xxx/CVE-2025-27519.json +Cognita is a RAG (Retrieval Augmented Generation) Framework for building modular, open source applications for production by TrueFoundry. A path traversal issue exists at /v1/internal/upload-to-local-directory which is enabled when the Local env variable is set to true, such as when Cognita is setup using Docker. Because the docker environment sets up the backend uvicorn server with auto reload enabled, when an attacker overwrites the /app/backend/__init__.py file, the file will automatically be reloaded and executed. This allows an attacker to get remote code execution in the context of the Docker container. This vulnerability is fixed in commit a78bd065e05a1b30a53a3386cc02e08c317d2243. + +analysis: 1. **分析是否与 namespace、cgroup、container 或者容器、隔离相关**: + 是的,该 CVE 与容器(Docker)相关。漏洞利用了 Docker 环境中文件被覆盖后自动重载的特性,从而导致远程代码执行。 + +2. **程序漏洞分析**: + - **程序类型**:这是容器内部运行的应用程序漏洞,具体是 Cognita 框架的漏洞。 + - **漏洞发生原因**:当 `Local` 环境变量设置为 `true` 时,Cognita 的 `/v1/internal/upload-to-local-directory` 接口存在路径遍历问题。在 Docker 环境下,uvicorn 后端服务器启用了自动重载功能。攻击者可以通过路径遍历覆盖 `/app/backend/__init__.py` 文件,由于自动重载机制,覆盖的文件会被重新加载并执行,从而导致远程代码执行。 + - **效果**:攻击者可以在 Docker 容器的上下文中实现远程代码执行,可能进一步影响容器内的数据或服务,甚至尝试逃逸容器以危害宿主机。 + +总结:这是一个容器内部运行的应用程序漏洞,与容器环境配置和自动重载机制有关,可能导致远程代码执行。 + +cve: ./data/2025/2xxx/CVE-2025-2312.json +A flaw was found in cifs-utils. When trying to obtain Kerberos credentials, the cifs.upcall program from the cifs-utils package makes an upcall to the wrong namespace in containerized environments. This issue may lead to disclosing sensitive data from the host's Kerberos credentials cache. + +analysis: 1. **分析是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与namespace、container和隔离相关。问题发生在容器化环境中,涉及到错误的命名空间(namespace)使用。 + +2. **程序漏洞分析**: + - **程序**:这是 `cifs-utils` 包中的漏洞,具体是 `cifs.upcall` 程序的问题。 + - **漏洞原因**:在容器化环境中,`cifs.upcall` 程序尝试获取 Kerberos 凭证时,错误地向主机的命名空间(而非容器的命名空间)发起了 upcall 请求。 + - **漏洞效果**:由于错误的 upcall,可能导致敏感数据(如主机的 Kerberos 凭证缓存内容)被泄露到容器中,从而破坏了容器与主机之间的隔离性。 + +cve: ./data/2025/30xxx/CVE-2025-30162.json +Cilium is a networking, observability, and security solution with an eBPF-based dataplane. For Cilium users who use Gateway API for Ingress for some services and use LB-IPAM or BGP for LB Service implementation and use network policies to block egress traffic from workloads in a namespace to workloads in other namespaces, egress traffic from workloads covered by such network policies to LoadBalancers configured by `Gateway` resources will incorrectly be allowed. LoadBalancer resources not deployed via a Gateway API configuration are not affected by this issue. This issue affects: Cilium v1.15 between v1.15.0 and v1.15.14 inclusive, v1.16 between v1.16.0 and v1.16.7 inclusive, and v1.17 between v1.17.0 and v1.17.1 inclusive. This issue is fixed in Cilium v1.15.15, v1.16.8, and v1.17.2. A Clusterwide Cilium Network Policy can be used to work around this issue for users who are unable to upgrade. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个CVE与namespace和容器隔离相关。问题涉及网络策略对工作负载(workloads)在不同namespace之间的出站流量的隔离控制。 + +2. **漏洞分析**: + - **程序**:这是Cilium的漏洞,Cilium是一个基于eBPF的容器网络解决方案,主要用于提供网络连接、可观测性和安全性。 + - **漏洞发生原因**:当用户使用Gateway API配置某些服务的入口(Ingress),并同时使用LB-IPAM或BGP实现负载均衡器(LoadBalancer)服务时,如果还通过网络策略(Network Policies)阻止某个namespace中的工作负载到其他namespace中工作负载的出站流量,则这些策略未能正确阻止流向由`Gateway`资源配置的负载均衡器的流量。 + - **效果**:本应被网络策略阻止的出站流量被错误地允许了,这可能导致流量绕过预期的隔离规则,从而削弱容器和namespace之间的网络隔离性,增加潜在的安全风险。 + +总结:这是一个与Cilium相关的容器网络隔离漏洞,影响特定版本的Cilium,可能导致namespace间的网络隔离失效。 + +cve: ./data/2025/32xxx/CVE-2025-32754.json +In jenkins/ssh-agent Docker images 6.11.1 and earlier, SSH host keys are generated on image creation for images based on Debian, causing all containers based on images of the same version use the same SSH host keys, allowing attackers able to insert themselves into the network path between the SSH client (typically the Jenkins controller) and SSH build agent to impersonate the latter. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,该CVE与容器和隔离相关。 + +2. **漏洞分析**: + - **程序**:这是 Jenkins 的 `ssh-agent` Docker 镜像中的漏洞。 + - **漏洞发生原因**:在基于 Debian 的 Jenkins/ssh-agent Docker 镜像创建过程中,SSH 主机密钥是在镜像构建时生成的。由于这些密钥是静态嵌入到镜像中的,因此所有基于同一版本镜像运行的容器都会共享相同的 SSH 主机密钥。 + - **漏洞效果**:这种共享的 SSH 主机密钥会导致安全风险。攻击者可以通过网络插入(例如中间人攻击),冒充 Jenkins 构建代理(SSH build agent),从而破坏通信的完整性和身份验证机制。这会削弱容器之间的隔离性,因为不同的容器实例本应具有独立的身份标识(不同的 SSH 密钥),但此处却因共享密钥而失去了这种隔离性。 + +cve: ./data/2025/32xxx/CVE-2025-32755.json +In jenkins/ssh-slave Docker images based on Debian, SSH host keys are generated on image creation for images based on Debian, causing all containers based on images of the same version use the same SSH host keys, allowing attackers able to insert themselves into the network path between the SSH client (typically the Jenkins controller) and SSH build agent to impersonate the latter. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 与容器和隔离相关。 + +2. **程序漏洞分析**: + - **程序**:这是 Jenkins 的 `ssh-slave` Docker 镜像中的漏洞。 + - **漏洞发生原因**:在基于 Debian 的 `jenkins/ssh-slave` Docker 镜像构建过程中,SSH 主机密钥是在镜像创建时生成的。因此,所有基于同一版本镜像启动的容器都会共享相同的 SSH 主机密钥。 + - **效果**:由于多个容器共享相同的 SSH 主机密钥,攻击者可以通过网络劫持(如中间人攻击)冒充 SSH 构建代理,欺骗 Jenkins 控制器,从而破坏容器之间的隔离性,并可能进一步威胁到整个 Jenkins 构建环境的安全性。 + +cve: ./data/2025/32xxx/CVE-2025-32955.json +Harden-Runner is a CI/CD security agent that works like an EDR for GitHub Actions runners. Versions from 0.12.0 to before 2.12.0 are vulnerable to `disable-sudo` bypass. Harden-Runner includes a policy option `disable-sudo` to prevent the GitHub Actions runner user from using sudo. This is implemented by removing the runner user from the sudoers file. However, this control can be bypassed as the runner user, being part of the docker group, can interact with the Docker daemon to launch privileged containers or access the host filesystem. This allows the attacker to regain root access or restore the sudoers file, effectively bypassing the restriction. This issue has been patched in version 2.12.0. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,该 CVE 与容器和隔离相关。描述中提到 Harden-Runner 的 `disable-sudo` 功能可以通过 Docker 守护程序被绕过,而 Docker 是一种容器技术,攻击者可以利用它启动特权容器或访问主机文件系统。 + +2. **这是什么程序的漏洞:** + - 漏洞存在于 **Harden-Runner** 程序中。 + - Harden-Runner 是一个用于 GitHub Actions 运行器的安全代理工具。 + - 漏洞发生的原因是 Harden-Runner 的 `disable-sudo` 功能实现不完善。虽然它通过移除 runner 用户的 sudo 权限来防止其使用 sudo,但由于 runner 用户属于 docker 组,仍然可以与 Docker 守护程序交互,从而启动特权容器或访问主机文件系统。 + - **效果:** 攻击者能够绕过 `disable-sudo` 限制,重新获得 root 权限或恢复 sudoers 文件,导致安全控制失效。 + +总结:此漏洞与容器隔离机制相关,因为攻击者利用了 Docker 容器的权限绕过了安全策略。 + +cve: ./data/2025/3xxx/CVE-2025-3047.json +When running the AWS Serverless Application Model Command Line Interface (SAM CLI) build process with Docker and symlinks are included in the build files, the container environment allows a user to access privileged files on the host by leveraging the elevated permissions granted to the tool. A user could leverage the elevated permissions to access restricted files via symlinks and copy them to a more permissive location on the container. + +Users should upgrade to v1.133.0 or newer and ensure any forked or derivative code is patched to incorporate the new fixes. + +analysis: 1. **是否与namespace、cgroup、container或者容器、隔离相关**: + 是的,这个 CVE 与容器和隔离相关。它涉及到在使用 Docker 构建 AWS SAM CLI 时,由于 symlinks 的存在,导致容器内的用户可以访问主机上的特权文件,破坏了容器与主机之间的隔离。 + +2. **漏洞所属程序及影响分析**: + - **程序**:这是 AWS Serverless Application Model (SAM) CLI 的漏洞,具体发生在其使用 Docker 进行构建的过程中。 + - **漏洞发生原因**:当 SAM CLI 使用 Docker 构建应用程序时,如果构建文件中包含符号链接(symlinks),容器内的进程可以通过这些符号链接访问主机上的特权文件。这是因为 SAM CLI 在容器内运行时被赋予了较高的权限,而没有正确限制对主机文件系统的访问。 + - **效果**:攻击者可以利用此漏洞通过容器访问主机上的敏感文件,并将这些文件复制到容器内更宽松的位置,从而泄露主机上的机密信息或进一步扩大攻击范围。这破坏了容器与主机之间的隔离机制,可能导致数据泄露或其他安全问题。 + +总结:该 CVE 与容器隔离相关,是 AWS SAM CLI 在使用 Docker 构建时的一个漏洞,可能导致主机文件系统被访问和数据泄露。 + +cve: ./data/2025/3xxx/CVE-2025-3048.json +After completing a build with AWS Serverless Application Model Command Line Interface (SAM CLI) which include symlinks, the content of those symlinks are copied to the cache of the local workspace as regular files or directories. As a result, a user who does not have access to those symlinks outside of the Docker container would now have access via the local workspace. + +Users should upgrade to version 1.134.0 and ensure any forked or derivative code is patched to incorporate the new fixes. After upgrading, users must re-build their applications using the sam build --use-container to update the symlinks. + +analysis: 1. **是否与 namespace、cgroup、container 或者容器、隔离相关:** + 是的,此 CVE 与容器相关。描述中明确提到 Docker 容器,并且问题涉及容器内外的文件访问权限。 + +2. **程序漏洞分析:** + - **程序:** AWS Serverless Application Model Command Line Interface (SAM CLI) + - **漏洞发生原因:** 在使用 SAM CLI 构建应用程序时,如果构建过程中包含符号链接(symlinks),这些符号链接的内容会被复制到本地工作区的缓存中,并且被转换为普通文件或目录。 + - **效果:** 由于符号链接的内容在容器内被复制并暴露在本地工作区,原本无法访问这些符号链接内容的用户现在可以通过本地工作区访问它们。这破坏了容器的隔离性,导致敏感数据可能被未经授权的用户访问。 + +3. **结论:** + - 此漏洞与容器隔离性相关,因为它允许用户通过容器内的操作绕过正常的权限限制,访问原本受限的文件内容。 +