Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Assuming you have

  • Pandas (response_dp is a DataFrame object *_df are DataFrame objects in the following), which accepts API responses directlyaccept nested JSON data

  • requests (library) is being used

...

Based on that, the following Python 3 code exemplifies how to generate a Software Bill Of Materials (SBOM) for a GitHub Advanced Enterprise Security enabled repository.

Info

The set_score function is explained separately. It generates basic metrics.

Code Block
languagepy
def get_sbom_issues_score(hed=dict, graphql_url="", verbose=False, repo="", org=""):
    """
    Ask GitHub Sec API for data about the Dependabot findings and analyze it
    :param hed: dict, auth data
    :param graphql_url: GraphQL endpoint
    :param verbose: boolean, flag
    :param repo: string, repository name
    :param org: string, org name
    :return: sbom_score (int), sbom_severity_list (DataFrame statistical object)
    """

    from string import Template

    # this is the GraphQL query for the API
    query_template_sbom = """
    {
    repository(name: "

...

$repo", owner: "

...

$org") {
        vulnerabilityAlerts(first: 100) {
            nodes {
                createdAt
                dismissedAt
                state
              	dismissReason
                securityVulnerability {
                    package {
                        name
                    }
                  	severity
                    advisory {
                        description
                    }
                }
            }
        }
    }
    }
    """

    query_template_depbot_enabled =

...

 """
    {
    repository(name: "$repo", owner: "$org") {
        id
        hasVulnerabilityAlertsEnabled
        }
    }
    """

    print("Dependabot Repo: " + repo, file=sys.stdout)

    dbot_enabled_query = ""
    sbom_query = ""
    # prevent escaping the literal context of the graphql template
    if "\'" or "\"" not in repo + org:
        template_sbom = Template(query_template_sbom)
        sbom_query = template_sbom.substitute({'repo': repo, 'org': org})

        template_dbot_enabled = Template(query_template_depbot_enabled)
        dbot_enabled_query = template_dbot_enabled.substitute({'repo': repo, 'org': org})

    dbot_enabled_status = requests.post(graphql_url, headers=hed, json={'query': dbot_enabled_query})
    parsed_dbot_status_rply = dbot_enabled_status.json()["data"]

    dbot_status_df = pd.json_normalize(parsed_dbot_status_rply)
    dbot_status = dbot_status_df["repository.hasVulnerabilityAlertsEnabled"].iloc[0]

    response_dp = requests.post(graphql_url, headers=hed, json={'query': sbom_query})
    parsed_dp = response_dp.json()["data"]
    df_deps = pd.json_normalize(parsed_dp)

    # we need to rename the columns because dots with table headers cannot get handled correctly
    cols = df_deps.columns.map(lambda x: x.replace('.', '_') if isinstance(x, (str)) else x)
    df_deps.columns = cols

    # a sub-section of the flattened JSON gets extracted
    sub_json = df_deps['repository_vulnerabilityAlerts_nodes'][0]

    # needed in case there are 0 issues and the HTTP status code is ok
    if len(sub_json) == 0 and response_dp.status_code == 200:
        status = {"Status": "No findings"}
        status_df = pd.DataFrame([status])
        print("Dependabot Status: " + "no findings for repo", file=sys.stdout)
        return 0, status_df

    # handle disabled state
    if not dbot_status or response_dp.status_code == 403:
        status = {"Status": "Disabled"}
        status_df = pd.DataFrame([status])
        print("Dependabot Status: " + "disabled for repo", file=sys.stdout)
        print()
        return 100, status_df

    if len(sub_json) > 0 and response_dp.status_code == 200:
        print("Dependabot Status: " + "processing findings for repo", file=sys.stdout)

    # data with the findings needs to be re-framed
    dependabot_data = pd.DataFrame(sub_json)

    # data needs to vbe flattened again
    dependabot_issues = pd.json_normalize(pd.DataFrame.from_records(sub_json)["securityVulnerability"])

    # since the data is flattened and framed from JSON we need to normalize the types
    dependabotDf = pd.concat([dependabot_data["state"], dependabot_issues], axis=1)
    dependabotDf["state"] = dependabotDf["state"].astype(str)
    dependabotDf["severity"] = dependabotDf["severity"].str.lower()
    # print(dependabotDf)

    # column renamed again for this dataframe
    cols = dependabotDf.columns.map(lambda x: x.replace('.', '_') if isinstance(x, (str)) else x)
    dependabotDf.columns = cols

    # filter out anything that's not been treated (marked as dismissed in API)
    dependabot_severity_open_list = dependabotDf[dependabotDf['state'] == 'OPEN']
    print(dependabot_severity_open_list)

    if verbose:
        print("Software Components Issue List (open)")

    dependabot_severity_list = dependabot_severity_open_list["severity"].value_counts()

    if verbose:
        print(dependabot_severity_list)
        print

...

("Software Components Severity Score (open)")

    sbom_score = set_score(severity_df=dependabot_severity_list)

    # for better table style
    dependabot_severity_list = dependabot_severity_list.reset_index()
    dependabot_severity_list.columns = ['Risk', 'Dependency Findings Reported']

    return sbom_score, dependabot_severity_list

  • An equivalent REST endpoint doesn’t seem to exist ( last time I checked )

  • This is equivalent for GH Cloud and on-premises Server variants

Use these results in an SSDLC

Secure Software Development Lifecycle

Here is how basic metrics can be generated:

...