From b4e57bba59bb721cf0168c5e0bd357164ce5852a Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Sat, 13 Feb 2021 04:25:52 -0500 Subject: [PATCH 01/24] Update repository references Replace skeleton-docker with vdp-scanner-docker and update the Lineage parent repository. --- .github/lineage.yml | 2 +- CONTRIBUTING.md | 10 +++++----- README.md | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/lineage.yml b/.github/lineage.yml index 8dfc20b..b10c80c 100644 --- a/.github/lineage.yml +++ b/.github/lineage.yml @@ -3,4 +3,4 @@ version: "1" lineage: skeleton: - remote-url: https://github.com/cisagov/skeleton-generic.git + remote-url: https://github.com/cisagov/skeleton-docker.git diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4222005..fb777dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ all of which should be in this repository. If you want to report a bug or request a new feature, the most direct method is to [create an -issue](https://github.com/cisagov/skeleton-docker/issues) in this +issue](https://github.com/cisagov/vdp-scanner-docker/issues) in this repository. We recommend that you first search through existing issues (both open and closed) to check if your particular issue has already been reported. If it has then you might want to add a comment @@ -25,7 +25,7 @@ one. ## Pull requests ## If you choose to [submit a pull -request](https://github.com/cisagov/skeleton-docker/pulls), you will +request](https://github.com/cisagov/vdp-scanner-docker/pulls), you will notice that our continuous integration (CI) system runs a fairly extensive set of linters and syntax checkers. Your pull request may fail these checks, and that's OK. If you want you can stop there and @@ -111,9 +111,9 @@ can create and configure the Python virtual environment with these commands: ```console -cd skeleton-docker -pyenv virtualenv skeleton-docker -pyenv local skeleton-docker +cd vdp-scanner-docker +pyenv virtualenv vdp-scanner-docker +pyenv local vdp-scanner-docker pip install --requirement requirements-dev.txt ``` diff --git a/README.md b/README.md index 99246a9..f19c319 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ -# skeleton-docker 💀🐳 # +# vdp-scanner-docker 💀🐳 # -[![GitHub Build Status](https://github.com/cisagov/skeleton-docker/workflows/build/badge.svg)](https://github.com/cisagov/skeleton-docker/actions) -[![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/skeleton-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/skeleton-docker/alerts/) -[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cisagov/skeleton-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/skeleton-docker/context:python) +[![GitHub Build Status](https://github.com/cisagov/vdp-scanner-docker/workflows/build/badge.svg)](https://github.com/cisagov/vdp-scanner-docker/actions) +[![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/vdp-scanner-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/vdp-scanner-docker/alerts/) +[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cisagov/vdp-scanner-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/vdp-scanner-docker/context:python) ## Docker Image ## [![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/example)](https://hub.docker.com/r/cisagov/example) [![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/example)](https://hub.docker.com/r/cisagov/example) -[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/skeleton-docker/tags) +[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/vdp-scanner-docker/tags) This is a docker skeleton project that can be used to quickly get a new [cisagov](https://github.com/cisagov) GitHub docker project @@ -27,8 +27,8 @@ Pull `cisagov/example` from the Docker repository: Or build `cisagov/example` from source: - git clone https://github.com/cisagov/skeleton-docker.git - cd skeleton-docker + git clone https://github.com/cisagov/vdp-scanner-docker.git + cd vdp-scanner-docker docker-compose build --build-arg VERSION=0.0.1 ### Run ### From 7402eaeb01e6717b43edda6955443fb4207a8b00 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Fri, 19 Feb 2021 02:05:33 -0500 Subject: [PATCH 02/24] Update project references Replace references to the example docker image with references to a vdp-scanner image. Remove elements that will not be used in this Docker image. --- .github/workflows/build.yml | 2 +- Dockerfile | 17 ++------------ README.md | 47 ++++++------------------------------- docker-compose.yml | 15 ++++-------- src/secrets/quote.txt | 1 - tests/conftest.py | 2 +- tests/container_test.py | 2 +- 7 files changed, 16 insertions(+), 70 deletions(-) delete mode 100644 src/secrets/quote.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ce70bf5..39c6da3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,7 +26,7 @@ on: env: BUILDX_CACHE_DIR: ~/.cache/buildx - IMAGE_NAME: cisagov/example + IMAGE_NAME: cisagov/vdp-scanner PIP_CACHE_DIR: ~/.cache/pip PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,\ linux/arm64,linux/ppc64le,linux/s390x" diff --git a/Dockerfile b/Dockerfile index 8819053..b542450 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,9 @@ -ARG VERSION=unspecified - FROM python:3.9-alpine -ARG VERSION - # For a list of pre-defined annotation keys and value types see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md # Note: Additional labels are added by the build workflow. -LABEL org.opencontainers.image.authors="mark.feldhousen@cisa.dhs.gov" +LABEL org.opencontainers.image.authors="nicholas.mcdonnell@cisa.dhs.gov" LABEL org.opencontainers.image.vendor="Cyber and Infrastructure Security Agency" ARG CISA_UID=421 @@ -24,15 +20,6 @@ py-pip WORKDIR ${CISA_HOME} -RUN wget -O sourcecode.tgz https://github.com/cisagov/skeleton-python-library/archive/v${VERSION}.tar.gz && \ - tar xzf sourcecode.tgz --strip-components=1 && \ - pip install --requirement requirements.txt && \ - ln -snf /run/secrets/quote.txt src/example/data/secret.txt && \ - rm sourcecode.tgz - USER cisa -EXPOSE 8080/TCP -VOLUME ["/var/log"] -ENTRYPOINT ["example"] -CMD ["--log-level", "DEBUG"] +ENTRYPOINT ["python", "vdp-scanner.py"] diff --git a/README.md b/README.md index f19c319..9e2840f 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ ## Docker Image ## -[![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/example)](https://hub.docker.com/r/cisagov/example) -[![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/example)](https://hub.docker.com/r/cisagov/example) -[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/vdp-scanner-docker/tags) +[![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/vdp-scanner-docker)](https://hub.docker.com/r/cisagov/vdp-scanner) +[![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/vdp-scanner)](https://hub.docker.com/r/cisagov/vdp-scanner) +[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/vdp-scanner/tags) This is a docker skeleton project that can be used to quickly get a new [cisagov](https://github.com/cisagov) GitHub docker project @@ -21,11 +21,11 @@ appropriate for docker containers and the major languages that we use. ### Install ### -Pull `cisagov/example` from the Docker repository: +Pull `cisagov/vdp-scanner` from the Docker repository: - docker pull cisagov/example + docker pull cisagov/vdp-scanner -Or build `cisagov/example` from source: +Or build `cisagov/vdp-scanner` from source: git clone https://github.com/cisagov/vdp-scanner-docker.git cd vdp-scanner-docker @@ -33,40 +33,7 @@ Or build `cisagov/example` from source: ### Run ### - docker-compose run --rm example - -## Ports ## - -This container exposes the following ports: - -| Port | Protocol | Service | -|-------|----------|----------| -| 8080 | TCP | http | - -## Environment Variables ## - -| Variable | Default Value | Purpose | -|---------------|-------------------------------|--------------| -| ECHO_MESSAGE | `Hello World from Dockerfile` | Text to echo | - -## Secrets ## - -| Filename | Purpose | -|---------------|----------------------| -| quote.txt | Secret text to echo | - -## Volumes ## - -| Mount point | Purpose | -|-------------|----------------| -| /var/log | logging output | - -## New Repositories from a Skeleton ## - -Please see our [Project Setup guide](https://github.com/cisagov/development-guide/tree/develop/project_setup) -for step-by-step instructions on how to start a new repository from -a skeleton. This will save you time and effort when configuring a -new repository! + docker-compose run --rm vdp-scanner ## Contributing ## diff --git a/docker-compose.yml b/docker-compose.yml index 6774387..8944f63 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,19 +3,15 @@ version: "3.7" # This docker-compose file is used to build and test the container -secrets: - quote_txt: - file: ./src/secrets/quote.txt - services: - example: + vdp-scanner: # Run the container normally build: # VERSION must be specified on the command line: # e.g., --build-arg VERSION=0.0.1 context: . dockerfile: Dockerfile - image: cisagov/example + image: cisagov/vdp-scanner init: true restart: "no" environment: @@ -25,13 +21,10 @@ services: published: "8080" protocol: tcp mode: host - secrets: - - source: quote_txt - target: quote.txt - example-version: + vdp-scanner-version: # Run the container to collect version information - image: cisagov/example + image: cisagov/vdp-scanner init: true restart: "no" command: --version diff --git a/src/secrets/quote.txt b/src/secrets/quote.txt deleted file mode 100644 index 93ee1a8..0000000 --- a/src/secrets/quote.txt +++ /dev/null @@ -1 +0,0 @@ -There are no secrets better kept than the secrets everybody guesses. diff --git a/tests/conftest.py b/tests/conftest.py index 28d6c42..4c4452a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ # Third-Party Libraries import pytest -MAIN_SERVICE_NAME = "example" +MAIN_SERVICE_NAME = "vdp-scanner" VERSION_SERVICE_NAME = f"{MAIN_SERVICE_NAME}-version" diff --git a/tests/container_test.py b/tests/container_test.py index 6153028..fa214fe 100644 --- a/tests/container_test.py +++ b/tests/container_test.py @@ -1,5 +1,5 @@ #!/usr/bin/env pytest -vs -"""Tests for example container.""" +"""Tests for vdp-scanner container.""" # Standard Python Libraries import os From 3def961688bf57b5af55e85acb7c4a5698acfc71 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Fri, 19 Feb 2021 02:34:27 -0500 Subject: [PATCH 03/24] Create framework for image functionality Create the framing of a vdp-scanner.py script that is the core functionality of this Docker image. Adjust the Dockerfile and docker-compose.yml files to reflect these changes. --- Dockerfile | 39 +++++++++++++++++++++++++++++-------- docker-compose.yml | 7 ------- src/requirements.txt | 2 ++ src/vdp-scanner.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 src/requirements.txt create mode 100644 src/vdp-scanner.py diff --git a/Dockerfile b/Dockerfile index b542450..70ed453 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,6 @@ -FROM python:3.9-alpine +ARG PY_VERSION=3.9 + +FROM python:${PY_VERSION}-slim AS compile-stage # For a list of pre-defined annotation keys and value types see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md @@ -6,20 +8,41 @@ FROM python:3.9-alpine LABEL org.opencontainers.image.authors="nicholas.mcdonnell@cisa.dhs.gov" LABEL org.opencontainers.image.vendor="Cyber and Infrastructure Security Agency" +ENV PY_VENV=/opt/venv + +RUN python -m venv --system-site-packages ${PY_VENV} +ENV PATH="${PY_VENV}/bin:$PATH" +RUN python -m pip install --no-cache-dir \ + pip==21.0.1 \ + setuptools==53.0.0 \ + wheel==0.36.2 + +COPY src/requirements.txt requirements.txt +RUN python -m pip install --no-cache-dir --requirement requirements.txt + +FROM python:${PY_VERSION}-slim AS build-stage + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates=20200601~deb10u2 \ + openssl=1.1.1d-0+deb10u4 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ENV PY_VENV=/opt/venv +COPY --from=compile-stage ${PY_VENV} ${PY_VENV} +ENV PATH="${PY_VENV}/bin:$PATH" + ARG CISA_UID=421 ENV CISA_HOME="/home/cisa" -ENV ECHO_MESSAGE="Hello World from Dockerfile" RUN addgroup --system --gid ${CISA_UID} cisa \ && adduser --system --uid ${CISA_UID} --ingroup cisa cisa -RUN apk --update --no-cache add \ -ca-certificates \ -openssl \ -py-pip - WORKDIR ${CISA_HOME} - USER cisa +COPY src/version.txt version.txt +COPY src/vdp-scanner.py vdp-scanner.py + ENTRYPOINT ["python", "vdp-scanner.py"] diff --git a/docker-compose.yml b/docker-compose.yml index 8944f63..935c2bc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,13 +14,6 @@ services: image: cisagov/vdp-scanner init: true restart: "no" - environment: - - ECHO_MESSAGE=Hello World from docker-compose! - ports: - - target: "8080" - published: "8080" - protocol: tcp - mode: host vdp-scanner-version: # Run the container to collect version information diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..6054572 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,2 @@ +docopt +https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz diff --git a/src/vdp-scanner.py b/src/vdp-scanner.py new file mode 100644 index 0000000..f222438 --- /dev/null +++ b/src/vdp-scanner.py @@ -0,0 +1,46 @@ +"""Check current federal DotGov domains for a Vulnerability Disclosure Policy. + +Usage: + vdp-scanner.py [options] + +Options: + -h, --help Show this help message. + -v, --version Show script version. + -s, --source-csv=SOURCE_CSV CSV to use as a source of domains. + -a, --agency-csv=AGENCY_CSV Filename to use for Agency results. + -d, --domain-csv=DOMAIN_CSV Filename to use for Domain results. +""" + +# Standard Python Libraries +import logging +from typing import Any, Dict + +# Third-Party Libraries +import docopt + + +def get_version(version_file): + """Extract a version number from the given file path.""" + with open(version_file) as vfile: + for line in vfile.read().splitlines(): + if line.startswith("__version__"): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + + raise RuntimeError("Unable to find version string.") + + +def main(): + """Scan hosts with the hash-http-content package and output results.""" + logging.basicConfig( + format="%(asctime)-15s %(levelname)s %(message)s", level=logging.INFO + ) + + __version__: str = get_version("version.txt") + args: Dict[str, Any] = docopt.docopt(__doc__, version=__version__) + + logging.info(args) + + +if __name__ == "__main__": + main() From 83d17056528c796ceb6b6b46108c34205fe045aa Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Fri, 19 Feb 2021 12:31:50 -0500 Subject: [PATCH 04/24] Add installation of serverless-chrome binary and dependencies Update the Dockerfile and docker-compose.yml to reflect this addition. Rename the scanner script to match preferred Python naming conventions. --- Dockerfile | 21 +++++++++++++++++++-- docker-compose.yml | 6 ++++-- src/{vdp-scanner.py => vdp_scanner.py} | 0 3 files changed, 23 insertions(+), 4 deletions(-) rename src/{vdp-scanner.py => vdp_scanner.py} (100%) diff --git a/Dockerfile b/Dockerfile index 70ed453..bd1d5d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,13 +22,27 @@ RUN python -m pip install --no-cache-dir --requirement requirements.txt FROM python:${PY_VERSION}-slim AS build-stage +ARG SERVERLESS_CHROME_VERSION="v1.0.0-57" +ARG SERVERLESS_CHROME_LOCAL="/usr/local/bin/serverless-chrome" + RUN apt-get update \ && apt-get install -y --no-install-recommends \ ca-certificates=20200601~deb10u2 \ + chromium-common=88.0.4324.146-1~deb10u1 \ + curl=7.64.0-4+deb10u1 \ + libnss3=2:3.42.1-1+deb10u3 \ openssl=1.1.1d-0+deb10u4 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +# Download the specified serverless chrome release and install it for use +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +# Follow redirects and output as the specified file name +RUN curl -L \ + https://github.com/adieuadieu/serverless-chrome/releases/download/${SERVERLESS_CHROME_VERSION}/stable-headless-chromium-amazonlinux-2.zip \ + | gunzip --stdout - > ${SERVERLESS_CHROME_LOCAL} +RUN chmod 755 ${SERVERLESS_CHROME_LOCAL} + ENV PY_VENV=/opt/venv COPY --from=compile-stage ${PY_VENV} ${PY_VENV} ENV PATH="${PY_VENV}/bin:$PATH" @@ -42,7 +56,10 @@ RUN addgroup --system --gid ${CISA_UID} cisa \ WORKDIR ${CISA_HOME} USER cisa +RUN mkdir host_mount + COPY src/version.txt version.txt -COPY src/vdp-scanner.py vdp-scanner.py +COPY src/vdp_scanner.py vdp_scanner.py -ENTRYPOINT ["python", "vdp-scanner.py"] +ENTRYPOINT ["python", "vdp_scanner.py"] +CMD ["github"] diff --git a/docker-compose.yml b/docker-compose.yml index 935c2bc..cb0d254 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,8 +7,10 @@ services: vdp-scanner: # Run the container normally build: - # VERSION must be specified on the command line: - # e.g., --build-arg VERSION=0.0.1 + # SERVERLESS_CHROME_VERSION and SERVERLESS_CHROME_LOCAL can be specified + # on the command line to modify what is installed and where: + # --build-arg SERVERLESS_CHROME_VERSION=v1.0.0-56 + # --build-arg SERVERLESS_CHROME_LOCAL=/opt/serverless-chrome context: . dockerfile: Dockerfile image: cisagov/vdp-scanner diff --git a/src/vdp-scanner.py b/src/vdp_scanner.py similarity index 100% rename from src/vdp-scanner.py rename to src/vdp_scanner.py From 85c2855a6e8da73ca544ebc7c468910cf7096f5e Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Fri, 19 Feb 2021 12:56:46 -0500 Subject: [PATCH 05/24] Implement vdp_scanner.py functionality Implement the functionality to scan from a CSV of domains and output the Agency and Domain level result CSVs. The README and compose file have been updated to reflect these changes. --- .isort.cfg | 2 + README.md | 30 ++++- docker-compose.yml | 3 + src/requirements.txt | 2 + src/vdp_scanner.py | 258 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 284 insertions(+), 11 deletions(-) diff --git a/.isort.cfg b/.isort.cfg index 46d45f3..20544c1 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -6,5 +6,7 @@ import_heading_stdlib=Standard Python Libraries import_heading_thirdparty=Third-Party Libraries import_heading_firstparty=cisagov Libraries +known_first_party=hash_http_content + # Run isort under the black profile to align with our other Python linting profile=black diff --git a/README.md b/README.md index 9e2840f..02c6b6c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# vdp-scanner-docker 💀🐳 # +# vdp-scanner-docker 🔍📄 # [![GitHub Build Status](https://github.com/cisagov/vdp-scanner-docker/workflows/build/badge.svg)](https://github.com/cisagov/vdp-scanner-docker/actions) [![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/vdp-scanner-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/vdp-scanner-docker/alerts/) @@ -23,17 +23,35 @@ appropriate for docker containers and the major languages that we use. Pull `cisagov/vdp-scanner` from the Docker repository: - docker pull cisagov/vdp-scanner +```console +docker pull cisagov/vdp-scanner +``` Or build `cisagov/vdp-scanner` from source: - git clone https://github.com/cisagov/vdp-scanner-docker.git - cd vdp-scanner-docker - docker-compose build --build-arg VERSION=0.0.1 +```console +git clone https://github.com/cisagov/vdp-scanner-docker.git +cd vdp-scanner-docker +docker-compose build +``` ### Run ### - docker-compose run --rm vdp-scanner +This Docker image needs a mount to get the output from the script to the host. +Due to how Docker works, we need to specify the local User and Group IDs to +prevent errors or access conflicts with the generated files. + +Using `docker run` + +```console +docker-compose run --user $(id -u):$(id -g) --volume ".:/home/cisa/host_mount" --rm vdp-scanner +``` + +or using `docker-compose` + +```console +UID=$(id -u) GID=$(id -g) docker-compose up +``` ## Contributing ## diff --git a/docker-compose.yml b/docker-compose.yml index cb0d254..af542db 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,9 @@ services: image: cisagov/vdp-scanner init: true restart: "no" + user: "${UID}:${GID}" + volumes: + - .:/home/cisa/host_mount vdp-scanner-version: # Run the container to collect version information diff --git a/src/requirements.txt b/src/requirements.txt index 6054572..50de5f4 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,2 +1,4 @@ docopt https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz +requests +urllib3 diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index f222438..e737f7e 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -1,25 +1,215 @@ """Check current federal DotGov domains for a Vulnerability Disclosure Policy. Usage: - vdp-scanner.py [options] + vdp_scanner.py [options] local FILE + vdp_scanner.py [options] github + +Arguments: + FILE The local CSV file to use. Options: -h, --help Show this help message. -v, --version Show script version. - -s, --source-csv=SOURCE_CSV CSV to use as a source of domains. -a, --agency-csv=AGENCY_CSV Filename to use for Agency results. -d, --domain-csv=DOMAIN_CSV Filename to use for Domain results. + -p, --path-to-chrome=PATH Path to the serverless-chrome binary being used + [default: /usr/local/bin/serverless-chrome] """ # Standard Python Libraries +from collections import defaultdict +import csv +from datetime import datetime import logging -from typing import Any, Dict +from os.path import join as path_join +from typing import Any, Dict, List, NamedTuple, Optional, Tuple +from urllib.parse import urlparse, urlunparse # Third-Party Libraries import docopt +import requests +import urllib3 + +# cisagov Libraries +from hash_http_content import UrlHasher, UrlResult + + +class DomainResult(NamedTuple): + """Structured format for a domain check result.""" + + domain: str + agency: str + organization: str + security_contact: str + visited_url: str + is_redirect: bool + vdp_present: bool + + +class VdpScanner: + """Class to handle scanning and outputting the results of any scans.""" + + agency_csv_header = [ + "Agency", + "Total Domains", + "Domains with Security Contact Listed", + "Domains with Organization Listed", + "Domains with Matching Organization and Agency", + "Domains with Published VDP", + ] + + domain_csv_header = [ + "Domain", + "Agency", + "Organization", + "Security Contact Email", + "Visited URL", + "Was it Redirected", + "VDP is Published", + ] + + def __init__(self, hasher: UrlHasher): + """Initialize variables and perform setup.""" + self._hasher = hasher + file_date = datetime.utcnow().strftime("%Y-%m-%d") + self.agency_csv = f"agency_results_{file_date}.csv" + self.domain_csv = f"domain_results_{file_date}.csv" + self.output_directory = "host_mount" + + self.agency_results: defaultdict = defaultdict( + lambda: {k: 0 for k in self.agency_csv_header[1:]} + ) + + self.domain_results: List[Dict[str, Any]] = [] + + @staticmethod + def _log_vdp_failure(domain: str, err: Exception) -> None: + """Log failure information during check_for_vdp() execution.""" + logging.warning("Unable to retrieve hash for '%s'", domain) + logging.debug("Caught %s", type(err).__name__) + logging.debug(err) + + def check_for_vdp(self, domain: str) -> Tuple[str, bool, bool]: + """Check for a VDP at the given domain and return the relavent information.""" + url = urlparse(f"https://{domain}/vulnerability-disclosure-policy") + result: Optional[UrlResult] = None + + # Try with HTTPS first + try: + result = self._hasher.hash_url(urlunparse(url)) + # If there is a TLS issue, try running it without verifying + except requests.exceptions.SSLError: + logging.warning("Trying '%s' without TLS verification", domain) + try: + # Fallback to unverified TLS + result = self._hasher.hash_url(urlunparse(url), verify=False) + # If this also fails, fallback to HTTP + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + logging.warning("Falling back to HTTP for '%s'", domain) + # Try connecting to the HTTP endpoint instead + try: + result = self._hasher.hash_url( + urlunparse(url._replace(scheme="http")) + ) + # If we're unable to successfully retrieve the URL for some reason + except Exception as err: + self._log_vdp_failure(domain, err) + # Fallback to HTTP in case there is no HTTPS for the given domain + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + logging.warning("Falling back to HTTP for '%s'", domain) + # Try connecting to the HTTP endpoint instead + try: + result = self._hasher.hash_url(urlunparse(url._replace(scheme="http"))) + # If we're unable to successfully retrieve the URL for some reason + except Exception as err: + self._log_vdp_failure(domain, err) + except Exception as err: + self._log_vdp_failure(domain, err) + + if not result: + return ("", False, False) + + if result.status == 200: + return (result.visited_url, result.is_redirect, True) + + return (result.visited_url, result.is_redirect, False) + + def process_domain(self, domain_info: Dict[str, Any]) -> None: + """Process a domain entry from the DotGov CSV.""" + # These are direct copies from current-federal.csv + vdp_result = self.check_for_vdp(domain_info["Domain Name"]) + + self.add_domain_result( + DomainResult( + domain_info["Domain Name"], + domain_info["Agency"], + domain_info["Organization"], + domain_info["Security Contact Email"], + *vdp_result, + ) + ) + + def add_domain_result(self, result: DomainResult) -> None: + """Process the provided results for a domain.""" + result_dict = { + "Domain": result.domain, + "Agency": result.agency, + "Organization": result.organization, + "Security Contact Email": result.security_contact, + "Visited URL": result.visited_url, + "Was it Redirected": result.is_redirect, + "VDP is Published": result.vdp_present, + } + self.domain_results.append(result_dict) + + self.agency_results[result.agency]["Total Domains"] += 1 + if result.security_contact and result.security_contact != "(blank)": + self.agency_results[result.agency][ + "Domains with Security Contact Listed" + ] += 1 -def get_version(version_file): + if result.organization: + self.agency_results[result.agency]["Domains with Organization Listed"] += 1 + + if result.agency == result.organization: + self.agency_results[result.agency][ + "Domains with Matching Organization and Agency" + ] += 1 + + if result.vdp_present: + self.agency_results[result.agency]["Domains with Published VDP"] += 1 + + def output_agency_csv(self) -> None: + """Output the agency results to a CSV.""" + file = path_join(self.output_directory, self.agency_csv) + with open(file, "w") as csv_out: + agency_output = csv.DictWriter( + csv_out, fieldnames=VdpScanner.agency_csv_header + ) + agency_output.writeheader() + for agency, info in self.agency_results.items(): + output_dict = {"Agency": agency, **info} + agency_output.writerow(output_dict) + + def output_domain_csv(self) -> None: + """Output the agency results to a CSV.""" + file = path_join(self.output_directory, self.domain_csv) + with open(file, "w") as csv_out: + domain_output = csv.DictWriter( + csv_out, fieldnames=VdpScanner.domain_csv_header + ) + domain_output.writeheader() + for result in self.domain_results: + domain_output.writerow(result) + + def output_all_csvs(self) -> None: + """Output all CSVs.""" + self.output_agency_csv() + self.output_domain_csv() + + +def get_version(version_file) -> str: """Extract a version number from the given file path.""" with open(version_file) as vfile: for line in vfile.read().splitlines(): @@ -30,6 +220,26 @@ def get_version(version_file): raise RuntimeError("Unable to find version string.") +def get_local_csv(file: str) -> List[Dict[str, str]]: + """Load domains from a local CSV file.""" + with open(file) as csv_file: + csv_lines = [line.rstrip() for line in csv_file.readlines()] + + return list(csv.DictReader(csv_lines)) + + +def get_remote_csv() -> List[Dict[str, str]]: + """Load domains from the CSV at the given URL.""" + resp = requests.get( + "https://raw.githubusercontent.com/GSA/data/master/dotgov-domains/current-federal.csv" + ) + if resp.status_code != 200: + return [] + csv_lines = [str(line, resp.encoding) for line in resp.iter_lines()] + + return list(csv.DictReader(csv_lines)) + + def main(): """Scan hosts with the hash-http-content package and output results.""" logging.basicConfig( @@ -39,7 +249,45 @@ def main(): __version__: str = get_version("version.txt") args: Dict[str, Any] = docopt.docopt(__doc__, version=__version__) - logging.info(args) + # If we make a call to UrlHasher.hash_url() with verify=False, it will output + # a warning. Since this is a fallback mechanism, we can squelch these warnings. + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + browser_opts = { + "args": [ + "--no-sandbox", + "--disable-gpu", + "--disable-dev-shm-usage", + "--no-zygote", + ], + "executablePath": args["--path-to-chrome"], + } + http_hasher = UrlHasher("sha256", browser_options=browser_opts) + + scanner: VdpScanner = VdpScanner(http_hasher) + if args["--agency-csv"]: + scanner.agency_csv = args["--agency-csv"] + if args["--domain-csv"]: + scanner.domain_csv = args["--domain-csv"] + + current_federal: List[Dict[str, str]] + + if args["local"]: + current_federal = get_local_csv(path_join("host_mount", args["FILE"])) + + if args["github"]: + current_federal = get_remote_csv() + + total_domains = len(current_federal) + for i, domain_info in enumerate( + sorted(current_federal, key=lambda d: d["Domain Name"]), start=1 + ): + logging.info( + "Processing '%s' (%d/%d)...", domain_info["Domain Name"], i, total_domains + ) + scanner.process_domain(domain_info) + + scanner.output_all_csvs() if __name__ == "__main__": From d7599cdd6ffde9027fe5c2bc2c316adb7731d9e8 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Sun, 21 Feb 2021 20:42:20 -0500 Subject: [PATCH 06/24] Use pipenv for script requirements This switches to using pipenv to manage the requirements for the vdp_scanner.py script. This will help ensure deterministic Docker builds by installing specific package versions, but stores them in a maintainable matter. There was also a move back to using a root user because of issues with output to the host machine and running under a different UID inside the Docker container. A debug option was added to the script and the version of chromium-common was updated to resolve a build issue. --- Dockerfile | 29 +++--- README.md | 12 +-- docker-compose.yml | 2 +- src/Pipfile | 18 ++++ src/Pipfile.lock | 214 +++++++++++++++++++++++++++++++++++++++++++++ src/vdp_scanner.py | 12 +-- 6 files changed, 262 insertions(+), 25 deletions(-) create mode 100644 src/Pipfile create mode 100644 src/Pipfile.lock diff --git a/Dockerfile b/Dockerfile index bd1d5d0..d41e5dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,17 +8,26 @@ FROM python:${PY_VERSION}-slim AS compile-stage LABEL org.opencontainers.image.authors="nicholas.mcdonnell@cisa.dhs.gov" LABEL org.opencontainers.image.vendor="Cyber and Infrastructure Security Agency" -ENV PY_VENV=/opt/venv +ENV PY_VENV=/.venv +# Manually set up the virtual environment RUN python -m venv --system-site-packages ${PY_VENV} ENV PATH="${PY_VENV}/bin:$PATH" + +# Install core Python dependencies RUN python -m pip install --no-cache-dir \ pip==21.0.1 \ + pipenv==2020.11.15 \ setuptools==53.0.0 \ wheel==0.36.2 -COPY src/requirements.txt requirements.txt -RUN python -m pip install --no-cache-dir --requirement requirements.txt +# Install vdp_scanner.py requirements +COPY src/Pipfile Pipfile +COPY src/Pipfile.lock Pipfile.lock +# PIPENV_VENV_IN_PROJECT=1 directs pipenv to use the current directory for venvs +RUN PIPENV_VENV_IN_PROJECT=1 pipenv sync + +RUN python -m pip uninstall --yes pipenv FROM python:${PY_VERSION}-slim AS build-stage @@ -28,7 +37,7 @@ ARG SERVERLESS_CHROME_LOCAL="/usr/local/bin/serverless-chrome" RUN apt-get update \ && apt-get install -y --no-install-recommends \ ca-certificates=20200601~deb10u2 \ - chromium-common=88.0.4324.146-1~deb10u1 \ + chromium-common=88.0.4324.182-1~deb10u1 \ curl=7.64.0-4+deb10u1 \ libnss3=2:3.42.1-1+deb10u3 \ openssl=1.1.1d-0+deb10u4 \ @@ -43,19 +52,13 @@ RUN curl -L \ | gunzip --stdout - > ${SERVERLESS_CHROME_LOCAL} RUN chmod 755 ${SERVERLESS_CHROME_LOCAL} -ENV PY_VENV=/opt/venv +ENV PY_VENV=/.venv COPY --from=compile-stage ${PY_VENV} ${PY_VENV} ENV PATH="${PY_VENV}/bin:$PATH" -ARG CISA_UID=421 -ENV CISA_HOME="/home/cisa" - -RUN addgroup --system --gid ${CISA_UID} cisa \ - && adduser --system --uid ${CISA_UID} --ingroup cisa cisa - -WORKDIR ${CISA_HOME} -USER cisa +ENV TASK_HOME="/task" +WORKDIR ${TASK_HOME} RUN mkdir host_mount COPY src/version.txt version.txt diff --git a/README.md b/README.md index 02c6b6c..23cb6a1 100644 --- a/README.md +++ b/README.md @@ -37,20 +37,20 @@ docker-compose build ### Run ### -This Docker image needs a mount to get the output from the script to the host. -Due to how Docker works, we need to specify the local User and Group IDs to -prevent errors or access conflicts with the generated files. +This Docker image needs a bind mount to get the output from the script to the +host. Using `docker run` ```console -docker-compose run --user $(id -u):$(id -g) --volume ".:/home/cisa/host_mount" --rm vdp-scanner +docker run --mount type=bind,source=$(pwd),target=/task/host_mount --rm cisagov/vdp-scanner ``` -or using `docker-compose` +or if you have cloned the repository, you can use the included +`docker-compose.yml` ```console -UID=$(id -u) GID=$(id -g) docker-compose up +docker-compose up ``` ## Contributing ## diff --git a/docker-compose.yml b/docker-compose.yml index af542db..0d8f746 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,7 @@ services: restart: "no" user: "${UID}:${GID}" volumes: - - .:/home/cisa/host_mount + - .:/task/host_mount vdp-scanner-version: # Run the container to collect version information diff --git a/src/Pipfile b/src/Pipfile new file mode 100644 index 0000000..c95097f --- /dev/null +++ b/src/Pipfile @@ -0,0 +1,18 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +docopt = "*" +hash-http-content = {file = "https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz"} +requests = "*" +urllib3 = "*" +pip = "*" +setuptools = "*" +wheel = "*" + +[dev-packages] + +[requires] +python_version = "3" diff --git a/src/Pipfile.lock b/src/Pipfile.lock new file mode 100644 index 0000000..0163248 --- /dev/null +++ b/src/Pipfile.lock @@ -0,0 +1,214 @@ +{ + "_meta": { + "hash": { + "sha256": "1db2ac6669815a5e98c2e5e8fad55886f578b2e3b3240f1b56c5e04d95fcfecf" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "appdirs": { + "hashes": [ + "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", + "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" + ], + "version": "==1.4.4" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", + "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", + "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" + ], + "version": "==4.9.3" + }, + "certifi": { + "hashes": [ + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" + ], + "version": "==2020.12.5" + }, + "chardet": { + "hashes": [ + "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", + "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.0.0" + }, + "contextlib2": { + "hashes": [ + "sha256:01f490098c18b19d2bd5bb5dc445b2054d2fa97f09a4280ba2c5f3c394c8162e", + "sha256:3355078a159fbb44ee60ea80abd0d87b80b78c248643b49aa6d94673b413609b" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.6.0.post1" + }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "index": "pypi", + "version": "==0.6.2" + }, + "hash-http-content": { + "file": "https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz", + "hashes": [ + "sha256:388abc35517970eba40985df3283af85695aa77810e2a29fe94900484ff8a5b6" + ], + "version": "==0.0.1" + }, + "idna": { + "hashes": [ + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.10" + }, + "lxml": { + "hashes": [ + "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d", + "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37", + "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01", + "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2", + "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644", + "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75", + "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80", + "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2", + "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780", + "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98", + "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308", + "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf", + "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388", + "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d", + "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3", + "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8", + "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af", + "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2", + "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e", + "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939", + "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03", + "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d", + "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a", + "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5", + "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a", + "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711", + "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf", + "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089", + "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505", + "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b", + "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f", + "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc", + "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e", + "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931", + "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc", + "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe", + "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.6.2" + }, + "pyee": { + "hashes": [ + "sha256:383973b63ad7ed5e3c0311f8b179c52981f9e7b3eaea0e9a830d13ec34dde65f", + "sha256:92dacc5bd2bdb8f95aa8dd2585d47ca1c4840e2adb95ccf90034d64f725bfd31" + ], + "version": "==8.1.0" + }, + "pyppeteer": { + "hashes": [ + "sha256:c2974be1afa13b17f7ecd120d265d8b8cd324d536a231c3953ca872b68aba4af", + "sha256:d4cb4a5ef94b00c1073aed888b39646ce26cff3339cff7a3f1f1cc307bf50408" + ], + "markers": "python_full_version >= '3.6.1' and python_full_version < '4.0.0'", + "version": "==0.2.5" + }, + "requests": { + "hashes": [ + "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", + "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" + ], + "index": "pypi", + "version": "==2.25.1" + }, + "schema": { + "hashes": [ + "sha256:cf97e4cd27e203ab6bb35968532de1ed8991bce542a646f0ff1d643629a4945d", + "sha256:fbb6a52eb2d9facf292f233adcc6008cffd94343c63ccac9a1cb1f3e6de1db17" + ], + "version": "==0.7.4" + }, + "soupsieve": { + "hashes": [ + "sha256:407fa1e8eb3458d1b5614df51d9651a1180ea5fedf07feb46e45d7e25e6d6cdd", + "sha256:d3a5ea5b350423f47d07639f74475afedad48cf41c0ad7a82ca13a3928af34f6" + ], + "markers": "python_version >= '3.0'", + "version": "==2.2" + }, + "tqdm": { + "hashes": [ + "sha256:65185676e9fdf20d154cffd1c5de8e39ef9696ff7e59fe0156b1b08e468736af", + "sha256:70657337ec104eb4f3fb229285358f23f045433f6aea26846cdd55f0fd68945c" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==4.57.0" + }, + "urllib3": { + "hashes": [ + "sha256:1b465e494e3e0d8939b50680403e3aedaa2bc434b7d5af64dfd3c958d7f5ae80", + "sha256:de3eedaad74a2683334e282005cd8d7f22f4d55fa690a2a1020a416cb0a47e73" + ], + "index": "pypi", + "version": "==1.26.3" + }, + "websockets": { + "hashes": [ + "sha256:0e4fb4de42701340bd2353bb2eee45314651caa6ccee80dbd5f5d5978888fed5", + "sha256:1d3f1bf059d04a4e0eb4985a887d49195e15ebabc42364f4eb564b1d065793f5", + "sha256:20891f0dddade307ffddf593c733a3fdb6b83e6f9eef85908113e628fa5a8308", + "sha256:295359a2cc78736737dd88c343cd0747546b2174b5e1adc223824bcaf3e164cb", + "sha256:2db62a9142e88535038a6bcfea70ef9447696ea77891aebb730a333a51ed559a", + "sha256:3762791ab8b38948f0c4d281c8b2ddfa99b7e510e46bd8dfa942a5fff621068c", + "sha256:3db87421956f1b0779a7564915875ba774295cc86e81bc671631379371af1170", + "sha256:3ef56fcc7b1ff90de46ccd5a687bbd13a3180132268c4254fc0fa44ecf4fc422", + "sha256:4f9f7d28ce1d8f1295717c2c25b732c2bc0645db3215cf757551c392177d7cb8", + "sha256:5c01fd846263a75bc8a2b9542606927cfad57e7282965d96b93c387622487485", + "sha256:5c65d2da8c6bce0fca2528f69f44b2f977e06954c8512a952222cea50dad430f", + "sha256:751a556205d8245ff94aeef23546a1113b1dd4f6e4d102ded66c39b99c2ce6c8", + "sha256:7ff46d441db78241f4c6c27b3868c9ae71473fe03341340d2dfdbe8d79310acc", + "sha256:965889d9f0e2a75edd81a07592d0ced54daa5b0785f57dc429c378edbcffe779", + "sha256:9b248ba3dd8a03b1a10b19efe7d4f7fa41d158fdaa95e2cf65af5a7b95a4f989", + "sha256:9bef37ee224e104a413f0780e29adb3e514a5b698aabe0d969a6ba426b8435d1", + "sha256:c1ec8db4fac31850286b7cd3b9c0e1b944204668b8eb721674916d4e28744092", + "sha256:c8a116feafdb1f84607cb3b14aa1418424ae71fee131642fc568d21423b51824", + "sha256:ce85b06a10fc65e6143518b96d3dca27b081a740bae261c2fb20375801a9d56d", + "sha256:d705f8aeecdf3262379644e4b55107a3b55860eb812b673b28d0fbc347a60c55", + "sha256:e898a0863421650f0bebac8ba40840fc02258ef4714cb7e1fd76b6a6354bda36", + "sha256:f8a7bff6e8664afc4e6c28b983845c5bc14965030e3fb98789734d416af77c4b" + ], + "markers": "python_full_version >= '3.6.1'", + "version": "==8.1" + }, + "wheel": { + "hashes": [ + "sha256:78b5b185f0e5763c26ca1e324373aadd49182ca90e825f7853f4b2509215dc0e", + "sha256:e11eefd162658ea59a60a0f6c7d493a7190ea4b9a85e335b33489d9f17e0245e" + ], + "index": "pypi", + "version": "==0.36.2" + } + }, + "develop": {} +} diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index e737f7e..501642d 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -10,8 +10,9 @@ Options: -h, --help Show this help message. -v, --version Show script version. + -d, --debug Enable debugging output. -a, --agency-csv=AGENCY_CSV Filename to use for Agency results. - -d, --domain-csv=DOMAIN_CSV Filename to use for Domain results. + -t, --domain-csv=DOMAIN_CSV Filename to use for Domain (TLD) results. -p, --path-to-chrome=PATH Path to the serverless-chrome binary being used [default: /usr/local/bin/serverless-chrome] """ @@ -242,13 +243,14 @@ def get_remote_csv() -> List[Dict[str, str]]: def main(): """Scan hosts with the hash-http-content package and output results.""" - logging.basicConfig( - format="%(asctime)-15s %(levelname)s %(message)s", level=logging.INFO - ) - __version__: str = get_version("version.txt") args: Dict[str, Any] = docopt.docopt(__doc__, version=__version__) + log_level = logging.DEBUG if args["--debug"] else logging.INFO + logging.basicConfig( + format="%(asctime)-15s %(levelname)s %(message)s", level=log_level + ) + # If we make a call to UrlHasher.hash_url() with verify=False, it will output # a warning. Since this is a fallback mechanism, we can squelch these warnings. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) From 2aa49e44978d33c8e905adb0b147267ca7582f0e Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Sun, 21 Feb 2021 22:47:39 -0500 Subject: [PATCH 07/24] Pare testing down to version checks The nature of this Docker project does not lend itself well to runtime testing, so for the time being I have pared testing down to version checks only. --- tests/container_test.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/tests/container_test.py b/tests/container_test.py index fa214fe..07d062a 100644 --- a/tests/container_test.py +++ b/tests/container_test.py @@ -3,17 +3,10 @@ # Standard Python Libraries import os -import time # Third-Party Libraries import pytest -ENV_VAR = "ECHO_MESSAGE" -ENV_VAR_VAL = "Hello World from docker-compose!" -READY_MESSAGE = "This is a debug message" -SECRET_QUOTE = ( - "There are no secrets better kept than the secrets everybody guesses." # nosec -) RELEASE_TAG = os.getenv("RELEASE_TAG") VERSION_FILE = "src/version.txt" @@ -26,35 +19,6 @@ def test_container_count(dockerc): ), "Wrong number of containers were started." -def test_wait_for_ready(main_container): - """Wait for container to be ready.""" - TIMEOUT = 10 - for i in range(TIMEOUT): - if READY_MESSAGE in main_container.logs().decode("utf-8"): - break - time.sleep(1) - else: - raise Exception( - f"Container does not seem ready. " - f'Expected "{READY_MESSAGE}" in the log within {TIMEOUT} seconds.' - ) - - -def test_wait_for_exits(main_container, version_container): - """Wait for containers to exit.""" - assert main_container.wait() == 0, "Container service (main) did not exit cleanly" - assert ( - version_container.wait() == 0 - ), "Container service (version) did not exit cleanly" - - -def test_output(main_container): - """Verify the container had the correct output.""" - main_container.wait() # make sure container exited if running test isolated - log_output = main_container.logs().decode("utf-8") - assert SECRET_QUOTE in log_output, "Secret not found in log output." - - @pytest.mark.skipif( RELEASE_TAG in [None, ""], reason="this is not a release (RELEASE_TAG not set)" ) From 040b4302ff5cf60c740a74324ad623bfc4c86cf1 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:16:09 -0500 Subject: [PATCH 08/24] Update README blurb describing this project --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 23cb6a1..97fc32b 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,11 @@ [![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/vdp-scanner)](https://hub.docker.com/r/cisagov/vdp-scanner) [![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/vdp-scanner/tags) -This is a docker skeleton project that can be used to quickly get a -new [cisagov](https://github.com/cisagov) GitHub docker project -started. This skeleton project contains [licensing -information](LICENSE), as well as [pre-commit hooks](https://pre-commit.com) -and [GitHub Actions](https://github.com/features/actions) configurations -appropriate for docker containers and the major languages that we use. +This is a Docker project to scan either the +[GSA current Federal .gov domain list](https://github.com/GSA/data/blob/master/dotgov-domains/current-federal.csv) +, or a given CSV in the same format, with the +[cisagov/hash-http-content](https://github.com/cisagov/hash-http-content) +Python library. Then it will output CSVs with Agency and domain level results. ## Usage ## From ded105f67ef8b53ba088b4f58252825edf7e19e6 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:18:52 -0500 Subject: [PATCH 09/24] Add comment explaining removal of pipenv --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index d41e5dc..6868bec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,8 @@ COPY src/Pipfile.lock Pipfile.lock # PIPENV_VENV_IN_PROJECT=1 directs pipenv to use the current directory for venvs RUN PIPENV_VENV_IN_PROJECT=1 pipenv sync +# We only need pipenv to set up the environment, so we remove it from the venv +# as a last step. RUN python -m pip uninstall --yes pipenv FROM python:${PY_VERSION}-slim AS build-stage From 5fb6b641820b386c805c9e136c8ec64d88e56ce5 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:19:44 -0500 Subject: [PATCH 10/24] Remove remnant from testing in docker-compose.yml I did testing to use a supplied UID/GID to work around file ownership on output CSVs, but I had issues with permissions inside the image and scrapped that approach. I forgot a piece of that in the docker-compose.yml, so it needs to be removed. --- docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0d8f746..a873463 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,6 @@ services: image: cisagov/vdp-scanner init: true restart: "no" - user: "${UID}:${GID}" volumes: - .:/task/host_mount From f7f818ff53a050100ecebac71b67ba15adae45cd Mon Sep 17 00:00:00 2001 From: Nick M <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:26:55 -0500 Subject: [PATCH 11/24] Changes to scanner script from review Define an acronym as part of the script's `__doc__` and change a logging output to be consistent with other logging. Co-authored-by: dav3r --- src/vdp_scanner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index 501642d..ae8dfe9 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -1,4 +1,4 @@ -"""Check current federal DotGov domains for a Vulnerability Disclosure Policy. +"""Check current federal DotGov domains for a Vulnerability Disclosure Policy (VDP). Usage: vdp_scanner.py [options] local FILE @@ -100,7 +100,7 @@ def check_for_vdp(self, domain: str) -> Tuple[str, bool, bool]: result = self._hasher.hash_url(urlunparse(url)) # If there is a TLS issue, try running it without verifying except requests.exceptions.SSLError: - logging.warning("Trying '%s' without TLS verification", domain) + logging.warning("Falling back to HTTPS without TLS verification for '%s'", domain) try: # Fallback to unverified TLS result = self._hasher.hash_url(urlunparse(url), verify=False) From a739beddf6231c1ed86de3a502d1987411b4c789 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:32:45 -0500 Subject: [PATCH 12/24] Make the value for a missing security contact a class variable This gives a useful name to refer to the value and allows it to be easily changed without worrying about updating any references. --- src/vdp_scanner.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index ae8dfe9..3f1b066 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -50,6 +50,10 @@ class DomainResult(NamedTuple): class VdpScanner: """Class to handle scanning and outputting the results of any scans.""" + # Value that represents a missing security contact in a GSA formatted domain + # list CSV. + MISSING_SECURITY_CONTACT = "(blank)" + agency_csv_header = [ "Agency", "Total Domains", @@ -165,7 +169,10 @@ def add_domain_result(self, result: DomainResult) -> None: self.agency_results[result.agency]["Total Domains"] += 1 - if result.security_contact and result.security_contact != "(blank)": + if ( + result.security_contact + and result.security_contact != self.MISSING_SECURITY_CONTACT + ): self.agency_results[result.agency][ "Domains with Security Contact Listed" ] += 1 From 88ec334f7296d4491d31b5708f3c3f01de8a861b Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:35:56 -0500 Subject: [PATCH 13/24] Add comments explaining two more class variables Add comments that explain what the agency_csv_header and domain_csv_header variables are used for in the scanner script's VdpScanner class. --- src/vdp_scanner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index 3f1b066..8903598 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -54,6 +54,7 @@ class VdpScanner: # list CSV. MISSING_SECURITY_CONTACT = "(blank)" + # Header for the Agency level results CSV. agency_csv_header = [ "Agency", "Total Domains", @@ -63,6 +64,7 @@ class VdpScanner: "Domains with Published VDP", ] + # Header for the domain level results CSV. domain_csv_header = [ "Domain", "Agency", From 1388d68b55e8ce285a30996d923e677d0bfe935b Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:37:55 -0500 Subject: [PATCH 14/24] Change to more descriptive variable name The `current_federal` variable name is not particularly helpful with the current functionality of the scanner script. I have updated it accordingly. --- src/vdp_scanner.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index 8903598..d48c5d8 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -281,17 +281,17 @@ def main(): if args["--domain-csv"]: scanner.domain_csv = args["--domain-csv"] - current_federal: List[Dict[str, str]] + domains_to_scan: List[Dict[str, str]] if args["local"]: - current_federal = get_local_csv(path_join("host_mount", args["FILE"])) + domains_to_scan = get_local_csv(path_join("host_mount", args["FILE"])) if args["github"]: - current_federal = get_remote_csv() + domains_to_scan = get_remote_csv() - total_domains = len(current_federal) + total_domains = len(domains_to_scan) for i, domain_info in enumerate( - sorted(current_federal, key=lambda d: d["Domain Name"]), start=1 + sorted(domains_to_scan, key=lambda d: d["Domain Name"]), start=1 ): logging.info( "Processing '%s' (%d/%d)...", domain_info["Domain Name"], i, total_domains From 47f0953ecbb7ffe68bdf0b8186f067f99d48e1af Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Tue, 23 Feb 2021 18:00:57 -0500 Subject: [PATCH 15/24] Manually blacken vdp_scanner.py script A merged suggestion on GitHub was not getting caught during local pre-commit invocations. I manually ran black against the script to get the needed reformatting to apply. --- src/vdp_scanner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index d48c5d8..427aced 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -106,7 +106,9 @@ def check_for_vdp(self, domain: str) -> Tuple[str, bool, bool]: result = self._hasher.hash_url(urlunparse(url)) # If there is a TLS issue, try running it without verifying except requests.exceptions.SSLError: - logging.warning("Falling back to HTTPS without TLS verification for '%s'", domain) + logging.warning( + "Falling back to HTTPS without TLS verification for '%s'", domain + ) try: # Fallback to unverified TLS result = self._hasher.hash_url(urlunparse(url), verify=False) From 1ffbafac036f01f053526915e9154abe1f55ae98 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Wed, 24 Feb 2021 11:17:12 -0500 Subject: [PATCH 16/24] Change case of agency and domain Change to lowercase any uses of agency or domain that are not for CSV output use. --- README.md | 2 +- src/vdp_scanner.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 97fc32b..0486478 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This is a Docker project to scan either the [GSA current Federal .gov domain list](https://github.com/GSA/data/blob/master/dotgov-domains/current-federal.csv) , or a given CSV in the same format, with the [cisagov/hash-http-content](https://github.com/cisagov/hash-http-content) -Python library. Then it will output CSVs with Agency and domain level results. +Python library. Then it will output CSVs with agency and domain level results. ## Usage ## diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py index 427aced..85df97d 100644 --- a/src/vdp_scanner.py +++ b/src/vdp_scanner.py @@ -11,8 +11,8 @@ -h, --help Show this help message. -v, --version Show script version. -d, --debug Enable debugging output. - -a, --agency-csv=AGENCY_CSV Filename to use for Agency results. - -t, --domain-csv=DOMAIN_CSV Filename to use for Domain (TLD) results. + -a, --agency-csv=AGENCY_CSV Filename to use for agency results. + -t, --domain-csv=DOMAIN_CSV Filename to use for domain (TLD) results. -p, --path-to-chrome=PATH Path to the serverless-chrome binary being used [default: /usr/local/bin/serverless-chrome] """ @@ -54,7 +54,7 @@ class VdpScanner: # list CSV. MISSING_SECURITY_CONTACT = "(blank)" - # Header for the Agency level results CSV. + # Header for the agency level results CSV. agency_csv_header = [ "Agency", "Total Domains", From 025a9de46a25e66d0ce1c85dbc2f1c413fb815e6 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Wed, 24 Feb 2021 11:22:57 -0500 Subject: [PATCH 17/24] Remove unnecessary commas It was pointed out that I was overusing commas in the README. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0486478..5dfa827 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This is a Docker project to scan either the [GSA current Federal .gov domain list](https://github.com/GSA/data/blob/master/dotgov-domains/current-federal.csv) -, or a given CSV in the same format, with the +or a given CSV in the same format with the [cisagov/hash-http-content](https://github.com/cisagov/hash-http-content) Python library. Then it will output CSVs with agency and domain level results. From 59da1a3e6cb2a8bed58781c5229473da1747cd5f Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Wed, 24 Mar 2021 13:29:55 -0400 Subject: [PATCH 18/24] Update openssl version installed and allow downgrades Update to use the latest version of the openssl package. Add option to allow apt to downgrade packages. This ensures that the specific versions we have listed are what is installed. --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6868bec..d858f38 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,12 +37,12 @@ ARG SERVERLESS_CHROME_VERSION="v1.0.0-57" ARG SERVERLESS_CHROME_LOCAL="/usr/local/bin/serverless-chrome" RUN apt-get update \ - && apt-get install -y --no-install-recommends \ + && apt-get install -y --allow-downgrades --no-install-recommends \ ca-certificates=20200601~deb10u2 \ chromium-common=88.0.4324.182-1~deb10u1 \ curl=7.64.0-4+deb10u1 \ libnss3=2:3.42.1-1+deb10u3 \ - openssl=1.1.1d-0+deb10u4 \ + openssl=1.1.1d-0+deb10u5 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From e43430557e2c4626d9f7c775ddd407b9ee836246 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Thu, 1 Apr 2021 14:00:36 -0400 Subject: [PATCH 19/24] Remove linux/ppc64le from buildx platforms The linux/ppc64le architecture does not have the chromium-common package. Since this image relies on that package, we cannot support that architecture at this time. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 39c6da3..d08ffcf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ env: IMAGE_NAME: cisagov/vdp-scanner PIP_CACHE_DIR: ~/.cache/pip PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,\ - linux/arm64,linux/ppc64le,linux/s390x" + linux/arm64,linux/s390x" PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit jobs: From 16ebb0360951da80091990343d3eec7004bd6bc2 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Thu, 1 Apr 2021 14:13:45 -0400 Subject: [PATCH 20/24] Update Dockerfile package versions Update the curl and openssl package versions to install the latest available. --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d858f38..a1dc44e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,9 +40,9 @@ RUN apt-get update \ && apt-get install -y --allow-downgrades --no-install-recommends \ ca-certificates=20200601~deb10u2 \ chromium-common=88.0.4324.182-1~deb10u1 \ - curl=7.64.0-4+deb10u1 \ + curl=7.64.0-4+deb10u2 \ libnss3=2:3.42.1-1+deb10u3 \ - openssl=1.1.1d-0+deb10u5 \ + openssl=1.1.1d-0+deb10u6 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 6d5ee395a61c8ddeeb47eef9ba25d44b7143df71 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Thu, 1 Apr 2021 14:23:15 -0400 Subject: [PATCH 21/24] Remove linux/s390x from buildx platforms The linux/s390x architecture does not have the chromium-common package. Since this image relies on that package, we cannot support that architecture at this time. --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d08ffcf..1500c3a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,8 +28,7 @@ env: BUILDX_CACHE_DIR: ~/.cache/buildx IMAGE_NAME: cisagov/vdp-scanner PIP_CACHE_DIR: ~/.cache/pip - PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,\ - linux/arm64,linux/s390x" + PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64" PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit jobs: From d8a4ada08bac9d3658ebb6c5013801b1d1101e9b Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Thu, 1 Apr 2021 14:35:04 -0400 Subject: [PATCH 22/24] Remove linux/arm/v6 from buildx platforms The linux/arm/v6 architecture does not have the chromium-common package. Since this image relies on that package, we cannot support that architecture at this time. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1500c3a..ece30de 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ env: BUILDX_CACHE_DIR: ~/.cache/buildx IMAGE_NAME: cisagov/vdp-scanner PIP_CACHE_DIR: ~/.cache/pip - PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64" + PLATFORMS: "linux/amd64,linux/arm/v7,linux/arm64" PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit jobs: From 944c513e8d2618b21119e9f108d11fab4a64362a Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Thu, 1 Apr 2021 17:15:46 -0400 Subject: [PATCH 23/24] Update dependencies in Dockerfile Add libxml2-dev and lxbslt1-dev packages and switch to using the non-slim version of python Docker images in the compile stage to support successful building of the lxml Python package on all platforms. --- Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a1dc44e..fc4c3d2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ARG PY_VERSION=3.9 -FROM python:${PY_VERSION}-slim AS compile-stage +FROM python:${PY_VERSION} AS compile-stage # For a list of pre-defined annotation keys and value types see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md @@ -8,6 +8,11 @@ FROM python:${PY_VERSION}-slim AS compile-stage LABEL org.opencontainers.image.authors="nicholas.mcdonnell@cisa.dhs.gov" LABEL org.opencontainers.image.vendor="Cyber and Infrastructure Security Agency" +RUN apt-get update \ + && apt-get install -y --allow-downgrades --no-install-recommends \ + libxml2-dev=2.9.4+dfsg1-7+deb10u1 \ + libxslt1-dev=1.1.32-2.2~deb10u1 + ENV PY_VENV=/.venv # Manually set up the virtual environment @@ -42,6 +47,8 @@ RUN apt-get update \ chromium-common=88.0.4324.182-1~deb10u1 \ curl=7.64.0-4+deb10u2 \ libnss3=2:3.42.1-1+deb10u3 \ + libxml2-dev=2.9.4+dfsg1-7+deb10u1 \ + libxslt1-dev=1.1.32-2.2~deb10u1 \ openssl=1.1.1d-0+deb10u6 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 94011c3ae81554aa0b2f3474044111896dce3e6f Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Thu, 1 Apr 2021 17:19:22 -0400 Subject: [PATCH 24/24] Update Python dependencies for the Docker image Updated by running `pipenv update` in the `src/` directory. --- src/Pipfile.lock | 95 ++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/src/Pipfile.lock b/src/Pipfile.lock index 0163248..f081d2e 100644 --- a/src/Pipfile.lock +++ b/src/Pipfile.lock @@ -78,46 +78,45 @@ }, "lxml": { "hashes": [ - "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d", - "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37", - "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01", - "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2", - "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644", - "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75", - "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80", - "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2", - "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780", - "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98", - "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308", - "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf", - "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388", - "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d", - "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3", - "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8", - "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af", - "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2", - "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e", - "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939", - "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03", - "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d", - "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a", - "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5", - "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a", - "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711", - "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf", - "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089", - "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505", - "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b", - "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f", - "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc", - "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e", - "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931", - "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc", - "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe", - "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e" + "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d", + "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3", + "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2", + "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f", + "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927", + "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3", + "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7", + "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f", + "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade", + "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468", + "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b", + "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4", + "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83", + "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04", + "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791", + "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51", + "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1", + "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a", + "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f", + "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee", + "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec", + "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969", + "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28", + "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a", + "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa", + "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106", + "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d", + "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4", + "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0", + "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4", + "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2", + "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0", + "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654", + "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2", + "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23", + "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==4.6.2" + "version": "==4.6.3" }, "pyee": { "hashes": [ @@ -131,7 +130,7 @@ "sha256:c2974be1afa13b17f7ecd120d265d8b8cd324d536a231c3953ca872b68aba4af", "sha256:d4cb4a5ef94b00c1073aed888b39646ce26cff3339cff7a3f1f1cc307bf50408" ], - "markers": "python_full_version >= '3.6.1' and python_full_version < '4.0.0'", + "markers": "python_version < '4' and python_full_version >= '3.6.1'", "version": "==0.2.5" }, "requests": { @@ -151,27 +150,27 @@ }, "soupsieve": { "hashes": [ - "sha256:407fa1e8eb3458d1b5614df51d9651a1180ea5fedf07feb46e45d7e25e6d6cdd", - "sha256:d3a5ea5b350423f47d07639f74475afedad48cf41c0ad7a82ca13a3928af34f6" + "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc", + "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b" ], "markers": "python_version >= '3.0'", - "version": "==2.2" + "version": "==2.2.1" }, "tqdm": { "hashes": [ - "sha256:65185676e9fdf20d154cffd1c5de8e39ef9696ff7e59fe0156b1b08e468736af", - "sha256:70657337ec104eb4f3fb229285358f23f045433f6aea26846cdd55f0fd68945c" + "sha256:9fdf349068d047d4cfbe24862c425883af1db29bcddf4b0eeb2524f6fbdb23c7", + "sha256:d666ae29164da3e517fcf125e41d4fe96e5bb375cd87ff9763f6b38b5592fe33" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==4.57.0" + "version": "==4.59.0" }, "urllib3": { "hashes": [ - "sha256:1b465e494e3e0d8939b50680403e3aedaa2bc434b7d5af64dfd3c958d7f5ae80", - "sha256:de3eedaad74a2683334e282005cd8d7f22f4d55fa690a2a1020a416cb0a47e73" + "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df", + "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937" ], "index": "pypi", - "version": "==1.26.3" + "version": "==1.26.4" }, "websockets": { "hashes": [