Files
cve/general_ai_analysis.py
sleepwithoutbz 79ae3fec86 Init commit.
2025-05-27 15:15:24 +08:00

80 lines
3.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from os import truncate
import re
import json
from openai import OpenAI
from typing import Dict, List, Any
def process_result(data: Any) -> str:
# target_key = "choices.0.message.content"
parts = ["choices", "0", "message", "content"]
try:
for part in parts:
if isinstance(data, list):
data = data[int(part)]
else:
data = data.get(part, None)
if data is None:
break
return data
except (KeyError, IndexError, TypeError):
return "N/A"
return "N/A"
def analyze_with_ai(data: str) -> str:
prompt = """
下面我会发给你一段文本第一行是CVE的编号与文件位置信息后续是这个CVE的详细描述。
请分析我发送给你的CVE信息并进行分析我有三个分析要求
1. 分析这个CVE信息是否与namespace、cgroup、container或者容器、隔离相关
2. 分析这个CVE是否会导致容器例如Docker等在运行时会影响其他容器或宿主机
3. 如果条件1、2全部不满足直接返回“N/A”不需要附带任何信息。
"""
client = OpenAI(
api_key="sk-5ec7751941974a9cb5855f746fe45a62",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
try:
completion = client.chat.completions.create(
# 模型列表https://help.aliyun.com/zh/model-studio/getting-started/models
model="qwen-plus",
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": data},
],
# Qwen3模型通过enable_thinking参数控制思考过程开源版默认True商业版默认False
# 使用Qwen3开源版模型时若未启用流式输出请将下行取消注释否则会报错
# extra_body={"enable_thinking": False},
)
return process_result(json.loads(completion.model_dump_json()))
except Exception as e:
print(f"调用大模型失败: {str(e)}")
return "N/A"
return "N/A"
if __name__ == "__main__":
pattern = r"\./data/.*x/CVE.*?\.json"
regex = re.compile(pattern)
with open("keyword_filter_11-25.log", "r", encoding="utf-8") as file:
text = file.read()
matches = list(regex.finditer(text))
log = open("analysis_result.log", "w", encoding="utf-8")
# 如果没有找到任何匹配项
if not matches:
print("No match found.")
else:
for i in range(len(matches)):
start = matches[i].start()
# 如果不是最后一个匹配项则end是下一个匹配项的起始位置否则为文本末尾
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
block = text[start:end].strip()
ans = analyze_with_ai(block)
if ans == "N/A":
continue
log.write(f"cve: {block}, analysis: {ans}\n\n")