...
Assuming you have
Pandas (
response_dp
is a DataFrame object*_df
are DataFrame objects in the following), which accepts API responses directlyaccept nested JSON datarequests (library) is being used
...
Based on that, the following Python 3 code exemplifies how to generate a Software Bill Of Materials (SBOM) for a GitHub Advanced Enterprise Security enabled repository.
Info |
---|
The |
Code Block | ||
---|---|---|
| ||
def get_sbom_issues_score(hed=dict, graphql_url="", verbose=False, repo="", org=""): """ Ask GitHub Sec API for data about the Dependabot findings and analyze it :param hed: dict, auth data :param graphql_url: GraphQL endpoint :param verbose: boolean, flag :param repo: string, repository name :param org: string, org name :return: sbom_score (int), sbom_severity_list (DataFrame statistical object) """ from string import Template # this is the GraphQL query for the API query_template_sbom = """ { repository(name: " |
...
$repo", owner: " |
...
$org") { vulnerabilityAlerts(first: 100) { nodes { createdAt dismissedAt state dismissReason securityVulnerability { package { name } severity advisory { description } } } } } } """ query_template_depbot_enabled = |
...
""" { repository(name: "$repo", owner: "$org") { id hasVulnerabilityAlertsEnabled } } """ print("Dependabot Repo: " + repo, file=sys.stdout) dbot_enabled_query = "" sbom_query = "" # prevent escaping the literal context of the graphql template if "\'" or "\"" not in repo + org: template_sbom = Template(query_template_sbom) sbom_query = template_sbom.substitute({'repo': repo, 'org': org}) template_dbot_enabled = Template(query_template_depbot_enabled) dbot_enabled_query = template_dbot_enabled.substitute({'repo': repo, 'org': org}) dbot_enabled_status = requests.post(graphql_url, headers=hed, json={'query': dbot_enabled_query}) parsed_dbot_status_rply = dbot_enabled_status.json()["data"] dbot_status_df = pd.json_normalize(parsed_dbot_status_rply) dbot_status = dbot_status_df["repository.hasVulnerabilityAlertsEnabled"].iloc[0] response_dp = requests.post(graphql_url, headers=hed, json={'query': sbom_query}) parsed_dp = response_dp.json()["data"] df_deps = pd.json_normalize(parsed_dp) # we need to rename the columns because dots with table headers cannot get handled correctly cols = df_deps.columns.map(lambda x: x.replace('.', '_') if isinstance(x, (str)) else x) df_deps.columns = cols # a sub-section of the flattened JSON gets extracted sub_json = df_deps['repository_vulnerabilityAlerts_nodes'][0] # needed in case there are 0 issues and the HTTP status code is ok if len(sub_json) == 0 and response_dp.status_code == 200: status = {"Status": "No findings"} status_df = pd.DataFrame([status]) print("Dependabot Status: " + "no findings for repo", file=sys.stdout) return 0, status_df # handle disabled state if not dbot_status or response_dp.status_code == 403: status = {"Status": "Disabled"} status_df = pd.DataFrame([status]) print("Dependabot Status: " + "disabled for repo", file=sys.stdout) print() return 100, status_df if len(sub_json) > 0 and response_dp.status_code == 200: print("Dependabot Status: " + "processing findings for repo", file=sys.stdout) # data with the findings needs to be re-framed dependabot_data = pd.DataFrame(sub_json) # data needs to vbe flattened again dependabot_issues = pd.json_normalize(pd.DataFrame.from_records(sub_json)["securityVulnerability"]) # since the data is flattened and framed from JSON we need to normalize the types dependabotDf = pd.concat([dependabot_data["state"], dependabot_issues], axis=1) dependabotDf["state"] = dependabotDf["state"].astype(str) dependabotDf["severity"] = dependabotDf["severity"].str.lower() # print(dependabotDf) # column renamed again for this dataframe cols = dependabotDf.columns.map(lambda x: x.replace('.', '_') if isinstance(x, (str)) else x) dependabotDf.columns = cols # filter out anything that's not been treated (marked as dismissed in API) dependabot_severity_open_list = dependabotDf[dependabotDf['state'] == 'OPEN'] print(dependabot_severity_open_list) if verbose: print("Software Components Issue List (open)") dependabot_severity_list = dependabot_severity_open_list["severity"].value_counts() if verbose: print(dependabot_severity_list) print |
...
("Software Components Severity Score (open)") sbom_score = set_score(severity_df=dependabot_severity_list) # for better table style dependabot_severity_list = dependabot_severity_list.reset_index() dependabot_severity_list.columns = ['Risk', 'Dependency Findings Reported'] return sbom_score, dependabot_severity_list |
An equivalent REST endpoint doesn’t seem to exist ( last time I checked )
This is equivalent for GH Cloud and on-premises Server variants
Use these results in an SSDLC
Secure Software Development Lifecycle
Here is how basic metrics can be generated:
...