diff --git a/.ansible-lint b/.ansible-lint new file mode 100644 index 0000000..0e80b05 --- /dev/null +++ b/.ansible-lint @@ -0,0 +1,22 @@ +--- +# See https://ansible-lint.readthedocs.io/en/latest/configuring.html +# for a list of the configuration elements that can exist in this +# file. +enable_list: + # Useful checks that one must opt-into. See here for more details: + # https://ansible-lint.readthedocs.io/en/latest/rules.html + - fcqn-builtins + - no-log-password + - no-same-owner +exclude_paths: + # This exclusion is implicit, unless exclude_paths is defined + - .cache + # Seems wise to ignore this too + - .github +kinds: + # This will force our systemd specific molecule configurations to be treated + # as plain yaml files by ansible-lint. This mirrors the default kind + # configuration in ansible-lint for molecule configurations: + # yaml: "**/molecule/*/{base,molecule}.{yaml,yml}" + - yaml: "**/molecule/*/molecule-{no,with}-systemd.yml" +use_default_rules: true diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 0000000..2b618f6 --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,13 @@ +--- +# Configuration file for the Bandit python security scanner +# https://bandit.readthedocs.io/en/latest/config.html + +# Tests are first included by `tests`, and then excluded by `skips`. +# If `tests` is empty, all tests are are considered included. + +tests: +# - B101 +# - B102 + +skips: + - B101 # skip "assert used" check since assertions are required in pytests diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..92ff826 --- /dev/null +++ b/.flake8 @@ -0,0 +1,25 @@ +[flake8] +max-line-length = 80 +# Select (turn on) +# * Complexity violations reported by mccabe (C) - +# http://flake8.pycqa.org/en/latest/user/error-codes.html#error-violation-codes +# * Documentation conventions compliance reported by pydocstyle (D) - +# http://www.pydocstyle.org/en/stable/error_codes.html +# * Default errors and warnings reported by pycodestyle (E and W) - +# https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes +# * Default errors reported by pyflakes (F) - +# http://flake8.pycqa.org/en/latest/glossary.html#term-pyflakes +# * Default warnings reported by flake8-bugbear (B) - +# https://github.com/PyCQA/flake8-bugbear#list-of-warnings +# * The B950 flake8-bugbear opinionated warning - +# https://github.com/PyCQA/flake8-bugbear#opinionated-warnings +select = C,D,E,F,W,B,B950 +# Ignore flake8's default warning about maximum line length, which has +# a hard stop at the configured value. Instead we use +# flake8-bugbear's B950, which allows up to 10% overage. +# +# Also ignore flake8's warning about line breaks before binary +# operators. It no longer agrees with PEP8. See, for example, here: +# https://github.com/ambv/black/issues/21. Guido agrees here: +# https://github.com/python/peps/commit/c59c4376ad233a62ca4b3a6060c81368bd21e85b. +ignore = E501,W503 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..ab07ea9 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,10 @@ +# Each line is a file pattern followed by one or more owners. + +# These owners will be the default owners for everything in the +# repo. Unless a later match takes precedence, these owners will be +# requested for review when someone opens a pull request. +* @dav3r @jsf9k @mcdonnnj + +# These folks own any files in the .github directory at the root of +# the repository and any of its subdirectories. +/.github/ @dav3r @felddy @jsf9k @mcdonnnj diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..41a26d0 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,23 @@ +--- + +version: 2 +updates: + - package-ecosystem: "docker" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "terraform" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/lineage.yml b/.github/lineage.yml new file mode 100644 index 0000000..14f5a0e --- /dev/null +++ b/.github/lineage.yml @@ -0,0 +1,5 @@ +--- +lineage: + skeleton: + remote-url: https://github.com/cisagov/skeleton-docker.git +version: '1' diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..d064971 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,430 @@ +--- +name: build + +on: + push: + branches: + - '**' + tags: + - 'v*.*.*' + pull_request: + schedule: + - cron: '0 10 * * *' # everyday at 10am + repository_dispatch: + # Respond to rebuild requests. See: https://github.com/cisagov/action-apb/ + types: [apb] + workflow_dispatch: + inputs: + remote-shell: + description: "Debug with remote shell" + required: true + default: false + image-tag: + description: "Tag to apply to pushed images" + required: true + default: dispatch + +env: + BUILDX_CACHE_DIR: ~/.cache/buildx + CURL_CACHE_DIR: ~/.cache/curl + IMAGE_NAME: cisagov/pshtt_reporter + PIP_CACHE_DIR: ~/.cache/pip + # Not all these platforms can be built in the six hour time limit + # imposed by GitHub Actions, so we remove the three most obscure + # platforms. + # PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,\ + # linux/arm64,linux/ppc64le,linux/s390x" + PLATFORMS: "linux/amd64,linux/arm/v7,linux/arm64" + PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit + RUN_TMATE: ${{ secrets.RUN_TMATE }} + +jobs: + lint: + # Checks out the source and runs pre-commit hooks. Detects coding errors + # and style deviations. + name: "Lint sources" + runs-on: ubuntu-latest + steps: + - id: setup-env + uses: cisagov/setup-env-github-action@develop + - uses: actions/checkout@v2 + - id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + # We need the Go version and Go cache location for the actions/cache step, + # so the Go installation must happen before that. + - uses: actions/setup-go@v2 + with: + go-version: '1.16' + - name: Store installed Go version + id: go-version + run: | + echo "::set-output name=version::"\ + "$(go version | sed 's/^go version go\([0-9.]\+\) .*/\1/')" + - name: Lookup Go cache directory + id: go-cache + run: | + echo "::set-output name=dir::$(go env GOCACHE)" + - uses: actions/cache@v2 + env: + BASE_CACHE_KEY: "${{ github.job }}-${{ runner.os }}-\ + py${{ steps.setup-python.outputs.python-version }}-\ + go${{ steps.go-version.outputs.version }}-\ + packer${{ steps.setup-env.outputs.packer-version }}-\ + tf${{ steps.setup-env.outputs.terraform-version }}-" + with: + # Note that the .terraform directory IS NOT included in the + # cache because if we were caching, then we would need to use + # the `-upgrade=true` option. This option blindly pulls down the + # latest modules and providers instead of checking to see if an + # update is required. That behavior defeats the benefits of caching. + # so there is no point in doing it for the .terraform directory. + path: | + ${{ env.PIP_CACHE_DIR }} + ${{ env.PRE_COMMIT_CACHE_DIR }} + ${{ env.CURL_CACHE_DIR }} + ${{ steps.go-cache.outputs.dir }} + key: "${{ env.BASE_CACHE_KEY }}\ + ${{ hashFiles('**/requirements-test.txt') }}-\ + ${{ hashFiles('**/requirements.txt') }}-\ + ${{ hashFiles('**/.pre-commit-config.yaml') }}" + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Setup curl cache + run: mkdir -p ${{ env.CURL_CACHE_DIR }} + - name: Install Packer + env: + PACKER_VERSION: ${{ steps.setup-env.outputs.packer-version }} + run: | + PACKER_ZIP="packer_${PACKER_VERSION}_linux_amd64.zip" + curl --output ${{ env.CURL_CACHE_DIR }}/"${PACKER_ZIP}" \ + --time-cond ${{ env.CURL_CACHE_DIR }}/"${PACKER_ZIP}" \ + --location \ + "https://releases.hashicorp.com/packer/${PACKER_VERSION}/${PACKER_ZIP}" + sudo unzip -d /opt/packer \ + ${{ env.CURL_CACHE_DIR }}/"${PACKER_ZIP}" + sudo mv /usr/local/bin/packer /usr/local/bin/packer-default + sudo ln -s /opt/packer/packer /usr/local/bin/packer + - uses: hashicorp/setup-terraform@v1 + with: + terraform_version: ${{ steps.setup-env.outputs.terraform-version }} + - name: Install shfmt + env: + PACKAGE_URL: mvdan.cc/sh/v3/cmd/shfmt + PACKAGE_VERSION: ${{ steps.setup-env.outputs.shfmt-version }} + run: go install ${PACKAGE_URL}@${PACKAGE_VERSION} + - name: Install Terraform-docs + env: + PACKAGE_URL: github.com/terraform-docs/terraform-docs + PACKAGE_VERSION: ${{ steps.setup-env.outputs.terraform-docs-version }} + run: go install ${PACKAGE_URL}@${PACKAGE_VERSION} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade --requirement requirements-test.txt + - name: Set up pre-commit hook environments + run: pre-commit install-hooks + - name: Run pre-commit on all files + run: pre-commit run --all-files + - name: Setup tmate debug session + uses: mxschmitt/action-tmate@v3 + if: env.RUN_TMATE + prepare: + # Calculates and publishes outputs that are used by other jobs. + # + # Outputs: + # created: + # The current date-time in RFC3339 format. + # repometa: + # The json metadata describing this repository. + # source_version: + # The source version as reported by the `bump_version.sh show` command. + # tags: + # A comma separated list of Docker tags to be applied to the images on + # Docker Hub. The tags will vary depending on: + # - The event that triggered the build. + # - The branch the build is based upon. + # - The git tag the build is based upon. + # + # When a build is based on a git tag of the form `v*.*.*` the image will + # be tagged on Docker Hub with multiple levels of version specificity. + # For example, a git tag of `v1.2.3+a` will generate Docker tags of + # `:1.2.3_a`, `:1.2.3`, `:1.2`, `:1`, and `:latest`. + # + # Builds targeting the default branch will be tagged with `:edge`. + # + # Builds from other branches will be tagged with the branch name. Solidi + # (`/` characters - commonly known as slashes) in branch names are + # replaced with hyphen-minuses (`-` characters) in the Docker tag. For + # more information about the solidus see these links: + # * https://www.compart.com/en/unicode/U+002F + # * https://en.wikipedia.org/wiki/Slash_(punctuation)#Encoding + # + # Builds triggered by a push event are tagged with a short hash in the + # form: sha-12345678 + # + # Builds triggered by a pull request are tagged with the pull request + # number in the form pr-123. + # + # Builds triggered using the GitHub GUI (workflow_dispatch) are tagged + # with the value specified by the user. + # + # Scheduled builds are tagged with `:nightly`. + name: "Prepare build variables" + runs-on: ubuntu-latest + outputs: + created: ${{ steps.prep.outputs.created }} + repometa: ${{ steps.repo.outputs.result }} + source_version: ${{ steps.prep.outputs.source_version }} + tags: ${{ steps.prep.outputs.tags }} + steps: + - uses: actions/checkout@v2 + - name: Gather repository metadata + id: repo + uses: actions/github-script@v5 + with: + script: | + const repo = await github.rest.repos.get(context.repo) + return repo.data + - name: Calculate output values + id: prep + run: | + VERSION=noop + SEMVER="^v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?$" + if [ "${{ github.event_name }}" = "schedule" ]; then + VERSION=nightly + elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + VERSION=${{ github.event.inputs.image-tag }} + elif [[ $GITHUB_REF == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + elif [[ $GITHUB_REF == refs/heads/* ]]; then + VERSION=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g') + if [ "${{ github.event.repository.default_branch }}" = "$VERSION" ]; + then + VERSION=edge + fi + elif [[ $GITHUB_REF == refs/pull/* ]]; then + VERSION=pr-${{ github.event.number }} + fi + if [[ $VERSION =~ $SEMVER ]]; then + VERSION_NO_V=${VERSION#v} + MAJOR="${BASH_REMATCH[1]}" + MINOR="${BASH_REMATCH[2]}" + PATCH="${BASH_REMATCH[3]}" + TAGS="${IMAGE_NAME}:${VERSION_NO_V//+/_},${IMAGE_NAME}:${MAJOR}.${MINOR}.${PATCH},${IMAGE_NAME}:${MAJOR}.${MINOR},${IMAGE_NAME}:${MAJOR},${IMAGE_NAME}:latest" + else + TAGS="${IMAGE_NAME}:${VERSION}" + fi + if [ "${{ github.event_name }}" = "push" ]; then + TAGS="${TAGS},${IMAGE_NAME}:sha-${GITHUB_SHA::8}" + fi + for i in ${TAGS//,/ } + do + TAGS="${TAGS},ghcr.io/${i}" + done + echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') + echo ::set-output name=source_version::$(./bump_version.sh show) + echo ::set-output name=tags::${TAGS} + echo tags=${TAGS} + - name: Setup tmate debug session + uses: mxschmitt/action-tmate@v3 + if: github.event.inputs.remote-shell == 'true' || env.RUN_TMATE + build: + # Builds a single test image for the native platform. This image is saved + # as an artifact and loaded by the test job. + name: "Build test image" + runs-on: ubuntu-latest + needs: [prepare] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Cache Docker layers + uses: actions/cache@v2 + env: + BASE_CACHE_KEY: buildx-${{ runner.os }}- + with: + path: ${{ env.BUILDX_CACHE_DIR }} + key: ${{ env.BASE_CACHE_KEY }}${{ github.sha }} + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Create dist directory + run: mkdir -p dist + - name: Build image + id: docker_build + uses: docker/build-push-action@v2 + with: + build-args: | + VERSION=${{ needs.prepare.outputs.source_version }} + cache-from: type=local,src=${{ env.BUILDX_CACHE_DIR }} + cache-to: type=local,dest=${{ env.BUILDX_CACHE_DIR }} + context: . + file: ./Dockerfile + outputs: type=docker,dest=dist/image.tar + tags: ${{ env.IMAGE_NAME }}:latest # not to be pushed + # For a list of pre-defined annotation keys and value types see: + # https://github.com/opencontainers/image-spec/blob/master/annotations.md + labels: "\ + org.opencontainers.image.created=${{ + needs.prepare.outputs.created }} + + org.opencontainers.image.description=${{ + fromJson(needs.prepare.outputs.repometa).description }} + + org.opencontainers.image.licenses=${{ + fromJson(needs.prepare.outputs.repometa).license.spdx_id }} + + org.opencontainers.image.revision=${{ github.sha }} + + org.opencontainers.image.source=${{ + fromJson(needs.prepare.outputs.repometa).clone_url }} + + org.opencontainers.image.title=${{ + fromJson(needs.prepare.outputs.repometa).name }} + + org.opencontainers.image.url=${{ + fromJson(needs.prepare.outputs.repometa).html_url }} + + org.opencontainers.image.version=${{ + needs.prepare.outputs.source_version }}" + - name: Compress image + run: gzip dist/image.tar + - name: Upload artifacts + uses: actions/upload-artifact@v2 + with: + name: dist + path: dist + - name: Setup tmate debug session + uses: mxschmitt/action-tmate@v3 + if: env.RUN_TMATE + test: + # Executes tests on the single-platform image created in the "build" job. + name: "Test image" + runs-on: ubuntu-latest + needs: [build] + steps: + - uses: actions/checkout@v2 + - id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Cache testing environments + uses: actions/cache@v2 + env: + BASE_CACHE_KEY: "${{ github.job }}-${{ runner.os }}-\ + py${{ steps.setup-python.outputs.python-version }}-" + with: + path: ${{ env.PIP_CACHE_DIR }} + key: "${{ env.BASE_CACHE_KEY }}\ + ${{ hashFiles('**/requirements-test.txt') }}-\ + ${{ hashFiles('**/requirements.txt') }}" + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade --requirement requirements-test.txt + - name: Download docker image artifact + uses: actions/download-artifact@v2 + with: + name: dist + path: dist + - name: Load docker image + run: docker load < dist/image.tar.gz + - name: Run tests + env: + RELEASE_TAG: ${{ github.event.release.tag_name }} + run: pytest --runslow + - name: Setup tmate debug session + uses: mxschmitt/action-tmate@v3 + if: env.RUN_TMATE + build-push-all: + # Builds the final set of images for each of the platforms listed in + # PLATFORMS environment variable. These images are tagged with the Docker + # tags calculated in the "prepare" job and pushed to Docker Hub and the + # GitHub Container Registry. The contents of README.md are pushed as the + # image's description to Docker Hub. This job is skipped when the + # triggering event is a pull request. + name: "Build and push all platforms" + runs-on: ubuntu-latest + needs: [lint, prepare, test] + if: github.event_name != 'pull_request' + steps: + - name: Login to Docker Hub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Checkout + uses: actions/checkout@v2 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Cache Docker layers + uses: actions/cache@v2 + env: + BASE_CACHE_KEY: buildx-${{ runner.os }}- + with: + path: ${{ env.BUILDX_CACHE_DIR }} + key: ${{ env.BASE_CACHE_KEY }}${{ github.sha }} + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Create cross-platform support Dockerfile-x + run: ./buildx-dockerfile.sh + - name: Build and push platform images to registries + id: docker_build + uses: docker/build-push-action@v2 + with: + build-args: | + VERSION=${{ needs.prepare.outputs.source_version }} + cache-from: type=local,src=${{ env.BUILDX_CACHE_DIR }} + cache-to: type=local,dest=${{ env.BUILDX_CACHE_DIR }} + context: . + file: ./Dockerfile-x + platforms: ${{ env.PLATFORMS }} + push: true + tags: ${{ needs.prepare.outputs.tags }} + # For a list of pre-defined annotation keys and value types see: + # https://github.com/opencontainers/image-spec/blob/master/annotations.md + labels: "\ + org.opencontainers.image.created=${{ + needs.prepare.outputs.created }} + + org.opencontainers.image.description=${{ + fromJson(needs.prepare.outputs.repometa).description }} + + org.opencontainers.image.licenses=${{ + fromJson(needs.prepare.outputs.repometa).license.spdx_id }} + + org.opencontainers.image.revision=${{ github.sha }} + + org.opencontainers.image.source=${{ + fromJson(needs.prepare.outputs.repometa).clone_url }} + + org.opencontainers.image.title=${{ + fromJson(needs.prepare.outputs.repometa).name }} + + org.opencontainers.image.url=${{ + fromJson(needs.prepare.outputs.repometa).html_url }} + + org.opencontainers.image.version=${{ + needs.prepare.outputs.source_version }}" + - name: Publish README.md to Docker Hub + env: + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + run: ./push_readme.sh + - name: Setup tmate debug session + uses: mxschmitt/action-tmate@v3 + if: env.RUN_TMATE diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..33d1999 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,69 @@ +--- + +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +name: "CodeQL" + +on: + push: + # Dependabot triggered push events have read-only access, but uploading code + # scanning requires write access. + branches-ignore: [dependabot/**] + pull_request: + # The branches below must be a subset of the branches above + branches: [develop] + schedule: + - cron: '0 21 * * 6' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + # Override automatic language detection by changing the below list + # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', + # 'python'] + language: ['python'] + # Learn more... + # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a + # config file. By default, queries listed here will override any + # specified in a config file. Prefix the list here with "+" to use + # these queries and those in the config file. queries: + # ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or + # Java). If this step fails, then you should remove it and run the build + # manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # ℹī¸ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏ī¸ If the Autobuild fails above, remove it and uncomment the following + # three lines and modify them (or add more) to build your code if your + # project uses a compiled language + + # - run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index 7981dea..b1efc19 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,12 @@ +# This file specifies intentionally untracked files that Git should ignore. +# Files already tracked by Git are not affected. +# See: https://git-scm.com/docs/gitignore + +## Docker ## +Dockerfile-x + +## Python ## +__pycache__ +.mypy_cache +.pytest_cache .python-version -.DS_Store diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..46d45f3 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,10 @@ +[settings] +combine_star=true +force_sort_within_sections=true + +import_heading_stdlib=Standard Python Libraries +import_heading_thirdparty=Third-Party Libraries +import_heading_firstparty=cisagov Libraries + +# Run isort under the black profile to align with our other Python linting +profile=black diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000..8950263 --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,8 @@ +--- +extraction: + python: + python_setup: + version: 3 + requirements_files: + - requirements-test.txt + setup_py: false diff --git a/.mdl_config.yaml b/.mdl_config.yaml new file mode 100644 index 0000000..b36f943 --- /dev/null +++ b/.mdl_config.yaml @@ -0,0 +1,50 @@ +--- + +# Default state for all rules +default: true + +# MD003/heading-style/header-style - Heading style +MD003: + # Enforce the ATX-closed style of header + style: "atx_closed" + +# MD004/ul-style - Unordered list style +MD004: + # Enforce dashes for unordered lists + style: "dash" + +# MD013/line-length - Line length +MD013: + # Do not enforce for code blocks + code_blocks: false + # Do not enforce for tables + tables: false + +# MD024/no-duplicate-heading/no-duplicate-header - Multiple headings with the +# same content +MD024: + # Allow headers with the same content as long as they are not in the same + # parent heading + allow_different_nesting: true + +# MD029/ol-prefix - Ordered list item prefix +MD029: + # Enforce the `1.` style for ordered lists + style: "one" + +# MD033/no-inline-html - Inline HTML +MD033: + # The h1 and img elements are allowed to permit header images + allowed_elements: + - h1 + - img + +# MD035/hr-style - Horizontal rule style +MD035: + # Enforce dashes for horizontal rules + style: "---" + +# MD046/code-block-style Code block style +MD046: + # Enforce the fenced style for code blocks + style: "fenced" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..09f9cec --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,141 @@ +--- +default_language_version: + # force all unspecified python hooks to run python3 + python: python3 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + - id: check-toml + - id: check-xml + - id: debug-statements + - id: detect-aws-credentials + args: + - --allow-missing-credentials + - id: detect-private-key + exclude: src/secrets/privkey.pem + - id: end-of-file-fixer + exclude: files/(issue|motd) + - id: mixed-line-ending + args: + - --fix=lf + - id: pretty-format-json + args: + - --autofix + - id: requirements-txt-fixer + - id: trailing-whitespace + + # Text file hooks + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.30.0 + hooks: + - id: markdownlint + args: + - --config=.mdl_config.yaml + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.5.1 + hooks: + - id: prettier + - repo: https://github.com/adrienverge/yamllint + rev: v1.26.3 + hooks: + - id: yamllint + args: + - --strict + + # pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit + rev: v2.16.0 + hooks: + - id: validate_manifest + + # Shell script hooks + - repo: https://github.com/cisagov/pre-commit-shfmt + rev: v0.0.2 + hooks: + - id: shfmt + args: + # Indent by two spaces + - -i + - '2' + # Binary operators may start a line + - -bn + # Switch cases are indented + - -ci + # Redirect operators are followed by a space + - -sr + - repo: https://github.com/detailyang/pre-commit-shell + rev: 1.0.5 + hooks: + - id: shell-lint + + # Python hooks + - repo: https://github.com/PyCQA/bandit + rev: 1.7.1 + hooks: + - id: bandit + name: bandit (tests tree) + files: tests + args: + - --config=.bandit.yml + # Run bandit everything but tests directory + - repo: https://github.com/PyCQA/bandit + rev: 1.7.0 + hooks: + - id: bandit + name: bandit (everything else) + exclude: tests + - repo: https://github.com/psf/black + rev: 21.12b0 + hooks: + - id: black + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + additional_dependencies: + - flake8-docstrings + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.931 + hooks: + - id: mypy + - repo: https://github.com/asottile/pyupgrade + rev: v2.31.0 + hooks: + - id: pyupgrade + + # Ansible hooks + - repo: https://github.com/ansible-community/ansible-lint + rev: v5.3.2 + hooks: + - id: ansible-lint + # files: molecule/default/playbook.yml + + # Terraform hooks + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.62.3 + hooks: + - id: terraform_fmt + - id: terraform_validate + + # Docker hooks + - repo: https://github.com/IamTheFij/docker-pre-commit + rev: v2.0.1 + hooks: + - id: docker-compose-check + + # Packer hooks + - repo: https://github.com/cisagov/pre-commit-packer + rev: v0.0.2 + hooks: + - id: packer_validate + - id: packer_fmt diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..738d402 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,7 @@ +# Already being linted by pretty-format-json +*.json +# Already being linted by mdl +*.md +# Already being linted by yamllint +*.yaml +*.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d4ed481..0000000 --- a/.travis.yml +++ /dev/null @@ -1,25 +0,0 @@ -language: minimal -sudo: yes -dist: xenial - -services: - - docker - -env: - global: - - IMAGE_NAME=cisagov/pshtt_reporter - - DOCKER_USER=jsf9k - - secure: "CQqWTdBgIsfDTWy5v99YVwPPqunjGRizq/lTlpP4JMu7JFUsZ7ht69XAXEsaQIVT/cqxmTaa03Zgz+k6Cq4XVtV+MmgMpwNTtTt3lEh7TZGNNU6pTB/1GOC0Ak0U95JpZGELsIxKHoe85KHqzjs6eMP9bYK2ESyfETpoGjq7rVYSojUkBkj4AKu6/MxzOaLe5LVHn7yqVDZIK2CQzTq/qvJ47ZewNuHuO4aPbflWB0U3XtmiORNKkPtB/jebxSO7EJnZnsflbqbFb7uUp1UDX7CoRUYb7NL+wV2dqd7MkC92zxEhKndH26V8V3IsApfPXnaxSoDISWjmGxfZ8sdqIljVyPsNyaY8YPuBNVJX8heOEAvqo2gGtbzoNlgAC74UjoUuGGYk7HIbNLchFE8S/iCye/GydlzpXs2mLGMge0jA3MQTNE4R+x2oJukcYtd4n7ubsprzxuuo+onfdoWb1VEz2enCvEYr5suZj7bYxN/rh52UzWz0rdvdTChqjVuMPhmABFdwjIWPe59Hh0Ad6VC18aswO8q902paM/SBRyVCzmJ8q5RWG6cEj/yHG1tvbublVnWjwW69jKWIMm+XWmmeMNGLB742rPDu7JNtadxVlndPwRVCnsCHWhNCnm+A5D2l+6nCqpT4Ow4/jle5LLnvwm37XjC0lcSQpW+KE2M=" - -before_install: - - sudo apt-get install -y shellcheck - -script: - - shellcheck bump_version.sh tag.sh travis_scripts/*.sh - - travis_scripts/build_docker_image.sh - -deploy: - - provider: script - script: travis_scripts/deploy_to_docker_hub.sh - on: - tags: true diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..7ed00eb --- /dev/null +++ b/.yamllint @@ -0,0 +1,7 @@ +--- +extends: default + +rules: + # yamllint doesn't like when we use yes and no for true and false, + # but that's pretty standard in Ansible. + truthy: disable diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..68c4579 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,165 @@ +# Welcome # + +We're so glad you're thinking about contributing to this open source +project! If you're unsure or afraid of anything, just ask or submit +the issue or pull request anyway. The worst that can happen is that +you'll be politely asked to change something. We appreciate any sort +of contribution, and don't want a wall of rules to get in the way of +that. + +Before contributing, we encourage you to read our CONTRIBUTING policy +(you are here), our [LICENSE](LICENSE), and our [README](README.md), +all of which should be in this repository. + +## Issues ## + +If you want to report a bug or request a new feature, the most direct +method is to [create an +issue](https://github.com/cisagov/pshtt_reporter/issues) in this +repository. We recommend that you first search through existing +issues (both open and closed) to check if your particular issue has +already been reported. If it has then you might want to add a comment +to the existing issue. If it hasn't then feel free to create a new +one. + +## Pull requests ## + +If you choose to [submit a pull +request](https://github.com/cisagov/pshtt_reporter/pulls), you will +notice that our continuous integration (CI) system runs a fairly +extensive set of linters and syntax checkers. Your pull request may +fail these checks, and that's OK. If you want you can stop there and +wait for us to make the necessary corrections to ensure your code +passes the CI checks. + +If you want to make the changes yourself, or if you want to become a +regular contributor, then you will want to set up +[pre-commit](https://pre-commit.com/) on your local machine. Once you +do that, the CI checks will run locally before you even write your +commit message. This speeds up your development cycle considerably. + +### Setting up pre-commit ### + +There are a few ways to do this, but we prefer to use +[`pyenv`](https://github.com/pyenv/pyenv) and +[`pyenv-virtualenv`](https://github.com/pyenv/pyenv-virtualenv) to +create and manage a Python virtual environment specific to this +project. + +If you already have `pyenv` and `pyenv-virtualenv` configured you can +take advantage of the `setup-env` tool in this repo to automate the +entire environment configuration process. + +```console +./setup-env +``` + +Otherwise, follow the steps below to manually configure your +environment. + +#### Installing and using `pyenv` and `pyenv-virtualenv` #### + +On the Mac, we recommend installing [brew](https://brew.sh/). Then +installation is as simple as `brew install pyenv pyenv-virtualenv` and +adding this to your profile: + +```bash +export PYENV_ROOT="$HOME/.pyenv" +export PATH="$PYENV_ROOT/bin:$PATH" +eval "$(pyenv init --path)" +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" +``` + +For Linux, Windows Subsystem for Linux (WSL), or on the Mac (if you +don't want to use `brew`) you can use +[pyenv/pyenv-installer](https://github.com/pyenv/pyenv-installer) to +install the necessary tools. Before running this ensure that you have +installed the prerequisites for your platform according to the +[`pyenv` wiki +page](https://github.com/pyenv/pyenv/wiki/common-build-problems). + +On WSL you should treat your platform as whatever Linux distribution +you've chosen to install. + +Once you have installed `pyenv` you will need to add the following +lines to your `.bash_profile` (or `.profile`): + +```bash +export PYENV_ROOT="$HOME/.pyenv" +export PATH="$PYENV_ROOT/bin:$PATH" +eval "$(pyenv init --path)" +``` + +and then add the following lines to your `.bashrc`: + +```bash +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" +``` + +If you want more information about setting up `pyenv` once installed, please run + +```console +pyenv init +``` + +and + +```console +pyenv virtualenv-init +``` + +for the current configuration instructions. + +If you are using a shell other than `bash` you should follow the +instructions that the `pyenv-installer` script outputs. + +You will need to reload your shell for these changes to take effect so +you can begin to use `pyenv`. + +For a list of Python versions that are already installed and ready to +use with `pyenv`, use the command `pyenv versions`. To see a list of +the Python versions available to be installed and used with `pyenv` +use the command `pyenv install --list`. You can read more +[here](https://github.com/pyenv/pyenv/blob/master/COMMANDS.md) about +the many things that `pyenv` can do. See +[here](https://github.com/pyenv/pyenv-virtualenv#usage) for the +additional capabilities that pyenv-virtualenv adds to the `pyenv` +command. + +#### Creating the Python virtual environment #### + +Once `pyenv` and `pyenv-virtualenv` are installed on your system, you +can create and configure the Python virtual environment with these +commands: + +```console +cd pshtt_reporter +pyenv virtualenv pshtt_reporter +pyenv local pshtt_reporter +pip install --requirement requirements-dev.txt +``` + +#### Installing the pre-commit hook #### + +Now setting up pre-commit is as simple as: + +```console +pre-commit install +``` + +At this point the pre-commit checks will run against any files that +you attempt to commit. If you want to run the checks against the +entire repo, just execute `pre-commit run --all-files`. + +## Public domain ## + +This project is in the public domain within the United States, and +copyright and related rights in the work worldwide are waived through +the [CC0 1.0 Universal public domain +dedication](https://creativecommons.org/publicdomain/zero/1.0/). + +All contributions to this project will be released under the CC0 +dedication. By submitting a pull request, you are agreeing to comply +with this waiver of copyright interest. diff --git a/Dockerfile b/Dockerfile index 6738dfd..e7219c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,30 @@ -FROM python:3.6-stretch -MAINTAINER Shane Frasier +FROM python:3.10.2-bullseye + +# For a list of pre-defined annotation keys and value types see: +# https://github.com/opencontainers/image-spec/blob/master/annotations.md +# Note: Additional labels are added by the build workflow. +LABEL org.opencontainers.image.authors="jeremy.frasier@cisa.dhs.gov" +LABEL org.opencontainers.image.vendor="Cybersecurity and Infrastructure Security Agency" + +### +# Setup the user and its home directory +### + +ARG CISA_GID=421 +ARG CISA_UID=${CISA_GID} +ENV CISA_USER="cisa" +ENV CISA_GROUP=${CISA_USER} +ENV CISA_HOME="/home/cisa" + +### +# Create unprivileged user +### +RUN groupadd --system --gid ${CISA_GID} ${CISA_GROUP} +RUN useradd --system --uid ${CISA_UID} --gid ${CISA_GROUP} --comment "${CISA_USER} user" ${CISA_USER} + +### +# Install everything we need +### ### # Dependencies @@ -44,50 +69,58 @@ RUN apt-get update -qq \ texlive-xetex \ fonts-lmodern \ lmodern \ - texlive-math-extra \ + texlive-science \ fontconfig \ redis-tools # Setup texlive latex stuff. RUN tlmgr init-usertree +### # Install requirements for report generation # +# Make sure pip and setuptools are the latest versions +# +# Note that we use pip --no-cache-dir to avoid writing to a local +# cache. This results in a smaller final image, at the cost of +# slightly longer install times. +# # numpy seems to be required to build basemap's wheel, so we'll # install it first. -RUN pip install --upgrade setuptools pip \ - && pip install --upgrade numpy \ - && pip install --upgrade \ - pypdf2 \ +# +# Note that matplotlib.basemap is currently incompatible with +# matplotlib 3.x. +RUN pip install --no-cache-dir --upgrade pip setuptools wheel \ + && pip install --no-cache-dir --upgrade numpy \ + && pip install --no-cache-dir --upgrade \ + chevron \ + docopt \ + geos \ matplotlib \ - pystache \ + https://github.com/cisagov/mongo-db-from-config/tarball/develop \ pandas \ - geos \ - docopt \ - https://github.com/cisagov/mongo-db-from-config/tarball/develop + pypdf2 ### -# Create unprivileged User +# Clean up aptitude cruft ### -ENV REPORTER_HOME=/home/reporter -RUN groupadd -r reporter \ - && useradd -r -c "Reporter user" -g reporter reporter - -# It would be nice to get rid of some build dependencies at this point +RUN apt-get --quiet --quiet clean +RUN rm -rf /var/lib/apt/lists/* -# Clean up aptitude cruft -RUN apt-get clean && rm -rf /var/lib/apt/lists/* +### +# Setup working directory and entrypoint +### # Put this just before we change users because the copy (and every -# step after it) will always be rerun by docker, but we need to be +# step after it) will always be rerun by Docker, but we need to be # root for the chown command. -COPY . $REPORTER_HOME -RUN chown -R reporter:reporter ${REPORTER_HOME} +COPY src ${CISA_HOME} +RUN chown -R ${CISA_USER}:${CISA_GROUP} ${CISA_HOME} ### # Prepare to Run ### # Right now we need to run as root for the font stuff -# USER reporter:reporter -WORKDIR $REPORTER_HOME +# USER ${CISA_USER}:${CISA_GROUP} +WORKDIR ${CISA_HOME} ENTRYPOINT ["./report.sh"] diff --git a/LICENSE.md b/LICENSE similarity index 100% rename from LICENSE.md rename to LICENSE diff --git a/README.md b/README.md index 83c53fd..4cd1a28 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,251 @@ -# CISA PSHTT Reporter :notebook: :file_folder: # +# PSHTT Reporter # -[![Build Status](https://travis-ci.com/cisagov/pshtt_reporter.svg?branch=develop)](https://travis-ci.com/cisagov/pshtt_reporter) -[![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/pshtt_reporter.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/pshtt_reporter/alerts/) -[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cisagov/pshtt_reporter.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/pshtt_reporter/context:python) -[![Language grade: JavaScript](https://img.shields.io/lgtm/grade/javascript/g/cisagov/pshtt_reporter.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/pshtt_reporter/context:javascript) +[![GitHub Build Status](https://github.com/cisagov/pshtt_reporter/workflows/build/badge.svg)](https://github.com/cisagov/pshtt_reporter/actions/workflows/build.yml) +[![CodeQL](https://github.com/cisagov/pshtt_reporter/workflows/CodeQL/badge.svg)](https://github.com/cisagov/pshtt_reporter/actions/workflows/codeql-analysis.yml) +[![Known Vulnerabilities](https://snyk.io/test/github/cisagov/pshtt_reporter/badge.svg)](https://snyk.io/test/github/cisagov/pshtt_reporter) + +## Docker Image ## + +[![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/pshtt_reporter)](https://hub.docker.com/r/cisagov/pshtt_reporter) +[![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/pshtt_reporter)](https://hub.docker.com/r/cisagov/pshtt_reporter) +[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/pshtt_reporter/tags) This is a Docker container that creates PDF reports for individual second-level domains using data collected via [pshtt](https://github.com/cisagov/pshtt) scans. This Docker container is intended to be run via -[orchestrator](https://github.com/cisagov/orchestrator). +[cisagov/orchestrator](https://github.com/cisagov/orchestrator). + +__N.B.:__ The secrets in the `src/secrets` directory are only used +when testing via the `docker-compose.yml` composition. Normally this +Docker container is run via the Docker composition in +[cisagov/orchestrator](https://github.com/cisagov/orchestrator), which +expects the secrets in a different location. + +## Running ## + +### Running with Docker ### + +To run the `cisagov/pshtt_reporter` image via Docker: + +```console +docker run cisagov/pshtt_reporter:1.2.0 +``` + +### Running with Docker Compose ### + +1. Create a `docker-compose.yml` file similar to the one below to use [Docker Compose](https://docs.docker.com/compose/). + + ```yaml + --- + version: "3.7" + + services: + pshtt_reporter: + image: cisagov/pshtt_reporter:1.2.0 + volumes: + - type: bind + source: + target: /home/cisa/shared + ``` + +1. Start the container and detach: + + ```console + docker-compose up --detach + ``` + +## Using secrets with your container ## + +This container also supports passing sensitive values via [Docker +secrets](https://docs.docker.com/engine/swarm/secrets/). Passing sensitive +values like your credentials can be more secure using secrets than using +environment variables. See the +[secrets](#secrets) section below for a table of all supported secret files. + +1. To use secrets, create a `database_creds.yml` file in [this + format](https://github.com/cisagov/mongo-db-from-config#usage): + + ```yaml + --- + version: '1' + + database: + name: cyhy + uri: mongodb://readonly:the_password@cyhy.example.com:27017/cyhy + + ``` + +1. Then add the secrets to your `docker-compose.yml` file: + + ```yaml + --- + version: "3.7" + + secrets: + database_creds: + file: database_creds.yml + + services: + pshtt_reporter: + image: cisagov/pshtt_reporter:1.2.0 + volumes: + - type: bind + source: + target: /home/cisa/shared + secrets: + - source: database_creds + target: database_creds.yml + ``` + +## Updating your container ## + +### Docker Compose ### + +1. Pull the new image from Docker Hub: + + ```console + docker-compose pull + ``` + +1. Recreate the running container by following the [previous instructions](#running-with-docker-compose): + + ```console + docker-compose up --detach + ``` + +### Docker ### + +1. Stop the running container: + + ```console + docker stop + ``` + +1. Pull the new image: + + ```console + docker pull cisagov/pshtt_reporter:1.2.0 + ``` + +1. Recreate and run the container by following the [previous instructions](#running-with-docker). + +## Image tags ## + +The images of this container are tagged with [semantic +versions](https://semver.org) of the underlying example project that they +containerize. It is recommended that most users use a version tag (e.g. +`:1.2.0`). + +| Image:tag | Description | +|-----------|-------------| +|`cisagov/pshtt_reporter:1.2.0`| An exact release version. | +|`cisagov/pshtt_reporter:1.2`| The most recent release matching the major and minor version numbers. | +|`cisagov/pshtt_reporter:1`| The most recent release matching the major version number. | +|`cisagov/pshtt_reporter:edge` | The most recent image built from a merge into the `develop` branch of this repository. | +|`cisagov/pshtt_reporter:nightly` | A nightly build of the `develop` branch of this repository. | +|`cisagov/pshtt_reporter:latest`| The most recent release image pushed to a container registry. Pulling an image using the `:latest` tag [should be avoided.](https://vsupalov.com/docker-latest-tag/) | + +See the [tags +tab](https://hub.docker.com/r/cisagov/pshtt_reporter/tags) on +Docker Hub for a list of all the supported tags. + +## Volumes ## + +| Mount point | Purpose | +|-------------|----------------| +| /home/cisa/shared | Output | + +## Ports ## + +There are no ports exposed by this container. + + + + + + + + + + +## Environment variables ## + +### Required ### + +There are no required environment variables. + + + +### Optional ### + +There are no optional environment variables. + + + +## Secrets ## + +| Filename | Purpose | +|---------------|----------------------| +| database_creds.yml | Cyber Hygiene read-only database credentials in [this format](https://github.com/cisagov/mongo-db-from-config#usage) | + +## Building from source ## + +Build the image locally using this git repository as the [build context](https://docs.docker.com/engine/reference/commandline/build/#git-repositories): + +```console +docker build \ + --tag cisagov/pshtt_reporter:1.2.0 \ + https://github.com/cisagov/pshtt_reporter.git#develop +``` + +## Cross-platform builds ## + +To create images that are compatible with other platforms, you can use the +[`buildx`](https://docs.docker.com/buildx/working-with-buildx/) feature of +Docker: + +1. Copy the project to your machine using the `Code` button above + or the command line: + + ```console + git clone https://github.com/cisagov/pshtt_reporter.git + cd pshtt_reporter + ``` + +1. Create the `Dockerfile-x` file with `buildx` platform support: + + ```console + ./buildx-dockerfile.sh + ``` + +1. Build the image using `buildx`: + + ```console + docker buildx build \ + --file Dockerfile-x \ + --platform linux/amd64 \ + --output type=docker \ + --tag cisagov/pshtt_reporter:1.2.0 . + ``` + +## Contributing ## + +We welcome contributions! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for +details. ## License ## -This project is in the worldwide [public domain](LICENSE.md). +This project is in the worldwide [public domain](LICENSE). This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through diff --git a/buildx-dockerfile.sh b/buildx-dockerfile.sh new file mode 100755 index 0000000..46710e9 --- /dev/null +++ b/buildx-dockerfile.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Create a Dockerfile suitable for a multi-platform build using buildx +# See: https://docs.docker.com/buildx/working-with-buildx/ + +set -o nounset +set -o errexit +set -o pipefail + +DOCKERFILE=Dockerfile +DOCKERFILEX=Dockerfile-x + +# We don't want this expression to expand. +# shellcheck disable=SC2016 +sed 's/^FROM /FROM --platform=$TARGETPLATFORM /g' < $DOCKERFILE > $DOCKERFILEX diff --git a/bump_version.sh b/bump_version.sh index 0cac5b7..48aa231 100755 --- a/bump_version.sh +++ b/bump_version.sh @@ -6,42 +6,46 @@ set -o nounset set -o errexit set -o pipefail -VERSION_FILE=version.txt +VERSION_FILE=src/version.txt +README_FILE=README.md HELP_INFORMATION="bump_version.sh (show|major|minor|patch|prerelease|build|finalize)" -old_version=$(cat $VERSION_FILE) +old_version=$(sed -n "s/^__version__ = \"\(.*\)\"$/\1/p" $VERSION_FILE) -if [ $# -ne 1 ] -then - echo "$HELP_INFORMATION" +if [ $# -ne 1 ]; then + echo "$HELP_INFORMATION" else - case $1 in - major|minor|patch|prerelease|build) - new_version=$(python -c "import semver; print(semver.bump_$1('$old_version'))") - echo Changing version from "$old_version" to "$new_version" - tmp_file=/tmp/version.$$ - sed "s/$old_version/$new_version/" $VERSION_FILE > $tmp_file - mv $tmp_file $VERSION_FILE - git add $VERSION_FILE - git commit -m"Bump version from $old_version to $new_version" - git push - ;; - finalize) - new_version=$(python -c "import semver; print(semver.finalize_version('$old_version'))") - echo Changing version from "$old_version" to "$new_version" - tmp_file=/tmp/version.$$ - sed "s/$old_version/$new_version/" $VERSION_FILE > $tmp_file - mv $tmp_file $VERSION_FILE - git add $VERSION_FILE - git commit -m"Finalize version from $old_version to $new_version" - git push - ;; - show) - echo "$old_version" - ;; - *) - echo "$HELP_INFORMATION" - ;; - esac + case $1 in + major | minor | patch | prerelease | build) + new_version=$(python -c "import semver; print(semver.bump_$1('$old_version'))") + echo Changing version from "$old_version" to "$new_version" + tmp_file=/tmp/version.$$ + sed "s/$old_version/$new_version/" $VERSION_FILE > $tmp_file + mv $tmp_file $VERSION_FILE + sed "s/$old_version/$new_version/" $README_FILE > $tmp_file + mv $tmp_file $README_FILE + git add $VERSION_FILE $README_FILE + git commit -m"Bump version from $old_version to $new_version" + git push + ;; + finalize) + new_version=$(python -c "import semver; print(semver.finalize_version('$old_version'))") + echo Changing version from "$old_version" to "$new_version" + tmp_file=/tmp/version.$$ + sed "s/$old_version/$new_version/" $VERSION_FILE > $tmp_file + mv $tmp_file $VERSION_FILE + sed "s/$old_version/$new_version/" $README_FILE > $tmp_file + mv $tmp_file $README_FILE + git add $VERSION_FILE $README_FILE + git commit -m"Finalize version from $old_version to $new_version" + git push + ;; + show) + echo "$old_version" + ;; + *) + echo "$HELP_INFORMATION" + ;; + esac fi diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ce76023 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,25 @@ +--- +version: "3.7" + +# This docker-compose file is used to build and test the container + +secrets: + database_creds: + file: ./src/secrets/database_creds.yml + +services: + pshtt_reporter: + # Run the container normally + build: + context: . + dockerfile: Dockerfile + depends_on: + - redis + image: cisagov/pshtt_reporter + init: true + restart: "no" + secrets: + - source: database_creds + target: database_creds.yml + redis: + image: redis:alpine diff --git a/push_readme.sh b/push_readme.sh new file mode 100755 index 0000000..29b12aa --- /dev/null +++ b/push_readme.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Push the README.md file to the Docker Hub repository + +# Requires the following environment variables to be set: +# DOCKER_PASSWORD, DOCKER_USERNAME, IMAGE_NAME + +set -o nounset +set -o errexit +set -o pipefail + +echo "Logging in and requesting JWT..." +token=$(curl --silent --request POST \ + --header "Content-Type: application/json" \ + --data \ + '{"username": "'"$DOCKER_USERNAME"'", "password": "'"$DOCKER_PASSWORD"'"}' \ + https://hub.docker.com/v2/users/login/ | jq --raw-output .token) + +echo "Pushing README file..." +code=$(jq --null-input --arg msg "$(< README.md)" \ + '{"registry":"registry-1.docker.io","full_description": $msg }' \ + | curl --silent --output /dev/null --location --write-out "%{http_code}" \ + https://hub.docker.com/v2/repositories/"${IMAGE_NAME}"/ \ + --data @- --request PATCH \ + --header "Content-Type: application/json" \ + --header "Authorization: JWT ${token}") + +if [[ "${code}" = "200" ]]; then + printf "Successfully pushed README to Docker Hub" +else + printf "Unable to push README to Docker Hub, response code: %s\n" "${code}" + exit 1 +fi diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..d302749 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --verbose -ra diff --git a/report/create_all_reports.py b/report/create_all_reports.py deleted file mode 100755 index c452660..0000000 --- a/report/create_all_reports.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 - -import os -import csv - -HOME_DIR = '/home/reporter' -SHARED_DATA_DIR = HOME_DIR + '/shared/' - - -def main(): - with open(SHARED_DATA_DIR + "artifacts/unique-agencies.csv") as agency_csv: - for row in sorted(csv.reader(agency_csv)): - bashCommand = HOME_DIR + \ - "/report/generate_https_scan_report.py " + \ - '"' + row[0] + '"' - os.system(bashCommand) - - -if __name__ == "__main__": - main() diff --git a/report/generate_https_scan_report.py b/report/generate_https_scan_report.py deleted file mode 100755 index 4b2a826..0000000 --- a/report/generate_https_scan_report.py +++ /dev/null @@ -1,884 +0,0 @@ -#!/usr/bin/env python3 - -'''Create Cyber Hygiene HTTPS Report PDF. - -Usage: - generate_https_scan_report [options] "AGENCY" - generate_https_scan_report (-h | --help) - generate_https_scan_report --version - -Options: - -d --debug Keep intermediate files for debugging. - -h --help Show this screen. - --version Show version. -''' -# standard python libraries -import codecs -import csv -from datetime import datetime -import json -import os -import shutil -import subprocess -import sys -import tempfile - -# third-party libraries (install with pip) -from docopt import docopt -from mongo_db_from_config import db_from_config -import pystache - -# intra-project modules -import graphs - -# constants -HOME_DIR = '/home/reporter' -SHARED_DATA_DIR = HOME_DIR + '/shared/' -DB_CONFIG_FILE = '/run/secrets/scan_read_creds.yml' -HTTPS_RESULTS_CSV_FILE = 'pshtt-results.csv' -OCSP_EXCLUSION_CSV_FILE = SHARED_DATA_DIR + 'artifacts/ocsp-crl.csv' -# Do not include the orgs below (based on _id) in the Report -EXEMPT_ORGS = [] -MUSTACHE_FILE = 'https_scan_report.mustache' -REPORT_JSON = 'https_scan_report.json' -REPORT_PDF = 'https_scan_report.pdf' -REPORT_TEX = 'https_scan_report.tex' -ASSETS_DIR_SRC = '../assets' -ASSETS_DIR_DST = 'assets' -LATEX_ESCAPE_MAP = { - '$': '\\$', - '%': '\\%', - '&': '\\&', - '#': '\\#', - '_': '\\_', - '{': '\\{', - '}': '\\}', - '[': '{[}', - ']': '{]}', - "'": "{'}", - '\\': '\\textbackslash{}', - '~': '\\textasciitilde{}', - '<': '\\textless{}', - '>': '\\textgreater{}', - '^': '\\textasciicircum{}', - '`': '{}`', - '\n': '\\newline{}', -} -PDF_CAPTURE_JS = 'pdf_capture.js' - - -class ReportGenerator(object): - # initiate variables - def __init__(self, db, agency, debug=False): - self.__db = db - self.__agency = agency - self.__agency_id = None - self.__debug = debug - self.__generated_time = datetime.utcnow() - self.__results = dict() # reusable query results - self.__requests = None - self.__report_doc = {'scores': []} - self.__all_domains = [] - self.__base_domains = [] - self.__eligible_domains_count = 0 # second-level/base-domains - self.__eligible_subdomains_count = 0 - self.__all_eligible_domains_count = 0 # responsive base+subs - self.__https_compliance_list = [] - self.__non_https_compliance_list = [] - self.__ineligible_domains = [] - self.__domain_count = 0 - self.__base_domain_count = 0 - self.__subdomain_count = 0 - self.__domain_supports_https = 0 - self.__domain_supports_https_count = 0 - self.__domain_enforces_https_count = 0 # added - self.__domain_uses_strong_hsts_count = 0 - self.__domain_has_no_weak_crypto_count = 0 - self.__strictly_forces_count = 0 - self.__downgrades_count = 0 - self.__hsts_count = 0 - self.__hsts_preloaded_count = 0 - self.__hsts_preload_ready_count = 0 - self.__hsts_entire_domain_count = 0 - self.__https_bad_chain_count = 0 - self.__https_bad_hostname_count = 0 - self.__https_expired_cert_count = 0 - self.__bod_1801_count = 0 - self.__hsts_base_domain_preloaded_count = 0 - self.__hsts_low_max_age_count = 0 - # self.__report_oid = ObjectId() - - # Read in and parse the OCSP exclusion domains. - # - # We use a dict for ocsp_exclusions because we want to take - # advantage of the speed of the underlying hash map. (We only - # care if a domain is present as an exclusion or not.) - ocsp_exclusions = {} - with open(OCSP_EXCLUSION_CSV_FILE, newline='') as ocsp_file: - csvreader = csv.reader(ocsp_file) - ocsp_exclusions = {row[0]: None for row in csvreader} - - # Get list of all domains from the database. Use no_cursor_timeout - # to handle agencies with a large number of domains. - all_domains_cursor = self.__db.https_scan.find( - {"latest": True, "agency.name": agency}, no_cursor_timeout=True - ) - # We really shouldn't include OCSP excluded domains in the - # total count. We do want to score them, for informational - # purposes, but the scores will not impact compliance. - # Therefore I should really perform this query: - # self.__domain_count = self.__db.https_scan.count({ - # 'latest': True, - # 'agency.name': agency, - # 'domain': { - # '$nin': ocsp_exclusions.keys() - # } - # }) - # - # In reality this value is not used in the report at all, so - # it doesn't matter. - self.__domain_count = all_domains_cursor.count() - - # Get weak crypto data for this agency's domains from the - # sslyze-scan collection - # - # TODO: Consider using aggregation $lookup with uncorrelated - # subquery to fetch https_scan and sslyze_scan data in one - # query (MongoDB server 3.6 and later) - - sslyze_data_all_domains = dict() - for host in self.__db.sslyze_scan.find( - { - 'latest': True, - 'agency.name': agency, - 'scanned_port': 443 - }, { - '_id': 0, - 'domain': 1, - 'scanned_port': 1, - 'scanned_hostname': 1, - 'sslv2': 1, - 'sslv3': 1, - 'any_3des': 1, - 'any_rc4': 1, - 'is_symantec_cert': 1 - } - ): - current_host_dict = { - 'scanned_hostname': host['scanned_hostname'], - 'scanned_port': host['scanned_port'], - 'sslv2': host['sslv2'], - 'sslv3': host['sslv3'], - 'any_3des': host['any_3des'], - 'any_rc4': host['any_rc4'], - 'is_symantec_cert': host['is_symantec_cert'] - } - - if not sslyze_data_all_domains.get(host['domain']): - sslyze_data_all_domains[host['domain']] = [current_host_dict] - else: - sslyze_data_all_domains[host['domain']].append( - current_host_dict - ) - - def add_weak_crypto_data_to_domain(domain_doc, - sslyze_data_all_domains): - # Look for weak crypto data in sslyze_data_all_domains and - # add hosts with weak crypto to - # domain_doc['hosts_with_weak_crypto'] - domain_doc['domain_has_weak_crypto'] = False - domain_doc['hosts_with_weak_crypto'] = [] - domain_doc['domain_has_symantec_cert'] = False - - if sslyze_data_all_domains.get(domain_doc['domain']): - for host in sslyze_data_all_domains[domain_doc['domain']]: - if host['sslv2'] or host['sslv3'] or \ - host['any_3des'] or host['any_rc4']: - domain_doc['domain_has_weak_crypto'] = True - domain_doc['hosts_with_weak_crypto'].append(host) - if host['is_symantec_cert']: - domain_doc['domain_has_symantec_cert'] = True - return domain_doc - - for domain_doc in all_domains_cursor: - domain_doc = add_weak_crypto_data_to_domain( - domain_doc, - sslyze_data_all_domains - ) - domain_doc['ocsp_domain'] = domain_doc['domain'] in ocsp_exclusions - self.__all_domains.append(domain_doc) - if domain_doc['is_base_domain']: - domain_doc['subdomains'] = list(self.__db.https_scan.find({ - 'latest': True, - 'base_domain': domain_doc['base_domain'], - 'is_base_domain': False - }).sort([('domain', 1)])) - self.__subdomain_count += len(domain_doc['subdomains']) - for subdomain_doc in domain_doc['subdomains']: - subdomain_doc = add_weak_crypto_data_to_domain( - subdomain_doc, - sslyze_data_all_domains - ) - subdomain_doc['ocsp_domain'] = \ - subdomain_doc['domain'] in ocsp_exclusions - self.__base_domains.append(domain_doc) - self.__agency_id = domain_doc['agency']['id'] - - # We instantiated this cursor without a timeout, so we have to - # close it manually. - all_domains_cursor.close() - - # Get a count of the second-level domains an agency owns. - # - # Really I should exclude OCSP domains here, but this isn't - # necessary since OCSP domains should be individual hostnames - # and not second-level domains. - self.__base_domain_count = self.__db.https_scan.find({ - 'latest': True, - 'agency.name': agency, - 'is_base_domain': True - }).count() - - def __score_domain(self, domain): - score = { - 'domain': domain['domain'], - 'ocsp_domain': domain['ocsp_domain'], - 'subdomain_scores': list() - } - - if domain['live']: - score['live_bool'] = True - # OCSP domains aren't eligible - if not domain['ocsp_domain']: - if domain['is_base_domain']: - self.__eligible_domains_count += 1 - self.__all_eligible_domains_count += 1 - else: - self.__eligible_subdomains_count += 1 - self.__all_eligible_domains_count += 1 - else: - # TODO Determine if this is still needed - self.__ineligible_domains.append({ - 'domain': domain['domain'] - }) - else: - score['live_bool'] = False - if domain['is_base_domain']: - # only include non-live base domains in the ineligible - # domains list; otherwise lots of non-existent subs - # will show in the report - - # TODO Determine if this is still needed - self.__ineligible_domains.append({ - 'domain': domain['domain'] - }) - - # https_full_connection and https_client_auth_required - if domain['https_full_connection']: - score['https_full_connection_bool'] = True - else: - score['https_full_connection_bool'] = False - if domain['https_client_auth_required']: - score['https_client_auth_required_bool'] = True - else: - score['https_client_auth_required_bool'] = False - - # strictly_forces_https - if domain['strictly_forces_https']: - # score['strictly_forces_https'] = 'Yes' - score['strictly_forces_https_bool'] = True - if not domain['ocsp_domain']: - self.__strictly_forces_count += 1 - else: - # score['strictly_forces_https'] = 'No' - score['strictly_forces_https_bool'] = False - - # "Uses HTTPS", domains_supports_https - # - # Domain gets credit for supporting HTTPS as long as it's live - # and hsts_base_domain_preloaded is true - if domain['domain_supports_https'] or \ - (domain['live'] and domain['hsts_base_domain_preloaded']): - # score['domain_supports_https'] = 'Yes' - score['domain_supports_https_bool'] = True - if not domain['ocsp_domain']: - self.__domain_supports_https_count += 1 - else: - # score['domain_supports_https'] = 'No' - score['domain_supports_https_bool'] = False - - # "Enforces HTTPS", domain_enforces_https - # - # Domain gets credit for enforcing HTTPS as long as it's live - # and hsts_base_domain_preloaded is true - if domain['domain_enforces_https'] or \ - (domain['live'] and domain['hsts_base_domain_preloaded']): - # score['domain_enforces_https'] = 'Yes' - score['domain_enforces_https_bool'] = True - if not domain['ocsp_domain']: - self.__domain_enforces_https_count += 1 - else: - # score['domain_enforces_https'] = 'No' - score['domain_enforces_https_bool'] = False - - # https_bad_chain - if domain['https_bad_chain'] and domain['https_bad_hostname']: - score['https_bad_chain_bool'] = True - if not domain['ocsp_domain']: - self.__https_bad_chain_count += 1 - elif (domain['https_bad_chain'] and - not domain['https_bad_hostname']) or \ - (domain['https_bad_chain'] and domain['https_expired_cert']): - if not domain['ocsp_domain']: - self.__https_bad_chain_count += 1 - else: - score['https_bad_chain_bool'] = False - - # https_bad_hostname - if domain['https_bad_hostname']: - score['https_bad_hostname_bool'] = True - if not domain['ocsp_domain']: - self.__https_bad_hostname_count += 1 - else: - score['https_bad_hostname_bool'] = False - - # https_expired_cert - if domain['https_expired_cert']: - score['https_expired_cert_bool'] = True - if not domain['ocsp_domain']: - self.__https_expired_cert_count += 1 - else: - score['https_expired_cert_bool'] = False - - # redirect - if domain['redirect']: - score['redirect_bool'] = True - else: - score['redirect_bool'] = False - - # downgrades_https - if domain['downgrades_https']: - # score['downgrades_https'] = 'Yes' - score['downgrades_https_bool'] = True - if not domain['ocsp_domain']: - self.__downgrades_count += 1 - else: - # score['downgrades_https'] = 'No' - score['downgrades_https_bool'] = False - - # Is the domain's base_domain preloaded? - # In this case, we only care if the domain is live - if domain['live'] and domain['hsts_base_domain_preloaded']: - score['hsts_base_domain_preloaded_bool'] = True - if not domain['ocsp_domain']: - self.__hsts_base_domain_preloaded_count += 1 - else: - score['hsts_base_domain_preloaded'] = False - - # hsts_preloaded > hsts_preload_pending > hsts_preload_ready - if domain['hsts_preloaded']: - # score['hsts_preloaded'] = 'Yes' - score['hsts_preloaded_bool'] = True - if not domain['ocsp_domain']: - self.__hsts_preloaded_count += 1 - else: - score['hsts_preloaded_bool'] = False - # score['hsts_preloaded'] = 'No' - if domain['hsts_preload_pending']: - score['hsts_preload_pending_bool'] = True - else: - score['hsts_preload_pending_bool'] = False - - if domain['hsts_preload_ready']: - score['hsts_preload_ready_bool'] = True - # score['hsts_preload_ready'] = 'Yes' - if not domain['ocsp_domain']: - self.__hsts_preload_ready_count += 1 - else: - score['hsts_preload_ready_bool'] = False - # score['hsts_preload_ready'] = 'No' - - # Are the HSTS headers being served? - if domain['hsts']: - # score['hsts'] = 'Yes' - score['hsts_bool'] = True - - # HTTPS Strict Transport Security (HSTS): This is 'Yes' in - # the report only if HSTS is present and the max-age is >= - # 1 year, as BOD 18-01 requires - # - # Domain gets credit for strong HSTS as long as it's live - # and hsts_base_domain_preloaded is true - if domain['domain_uses_strong_hsts'] or \ - (domain['live'] and domain['hsts_base_domain_preloaded']): - score['domain_uses_strong_hsts_bool'] = True - if not domain['ocsp_domain']: - self.__domain_uses_strong_hsts_count += 1 - else: - score['domain_uses_strong_hsts_bool'] = False - if 0 < domain['hsts_max_age'] < 31536000: - if not domain['ocsp_domain']: - self.__hsts_low_max_age_count += 1 - elif domain['live'] and ( - domain['hsts_base_domain_preloaded'] or - (not domain['https_full_connection'] and - domain['https_client_auth_required']) - ): - # If HSTS is not present but the base_domain is preloaded, - # "HSTS" gets a thumbs up. In this case, we only care if - # the domain is live. - # - # If we can't make a full HTTPS connection because the - # domain requires client authentication, then we can't - # know if they serve HSTS headers or not. We have chosen - # to give them the benefit of the doubt. - score['domain_uses_strong_hsts_bool'] = True - if not domain['ocsp_domain']: - self.__domain_uses_strong_hsts_count += 1 - else: - # No HSTS - # score['hsts'] = 'No' - score['hsts_bool'] = False - score['hsts_preloaded_bool'] = False - score['hsts_preload_pending_bool'] = False - score['hsts_preload_ready_bool'] = False - score['domain_uses_strong_hsts_bool'] = False - - # Does the domain have weak crypto? - score['domain_has_weak_crypto_bool'] = domain['domain_has_weak_crypto'] - if domain['live'] and not domain['domain_has_weak_crypto']: - if not domain['ocsp_domain']: - self.__domain_has_no_weak_crypto_count += 1 - # Build list of weak crypto host info and save it in - # score['hosts_with_weak_crypto'] - score['hosts_with_weak_crypto'] = list() - for host in domain['hosts_with_weak_crypto']: - weak_crypto_list = list() - for (wc_key, wc_text) in [ - ('sslv2', 'SSLv2'), - ('sslv3', 'SSLv3'), - ('any_3des', '3DES'), - ('any_rc4', 'RC4') - ]: - if host[wc_key]: - weak_crypto_list.append(wc_text) - score['hosts_with_weak_crypto'].append({ - 'hostname': host['scanned_hostname'], - 'port': host['scanned_port'], - 'weak_crypto_list_str': ', '.join(weak_crypto_list) - }) - - # Does the domain have a Symantec cert? - # If so, they have to be replaced - see: - # https://www.symantec.com/connect/blogs/information-replacement-symantec-ssltls-certificates - score['domain_has_symantec_cert_bool'] = \ - domain['domain_has_symantec_cert'] - - # BOD 18-01 compliant? - if ( - (domain['domain_supports_https'] and - domain['domain_enforces_https'] and - domain['domain_uses_strong_hsts']) or - (domain['live'] and ( - domain['hsts_base_domain_preloaded'] or ( - (not domain['https_full_connection'] and - domain['https_client_auth_required']) - ) - )) - ) and not domain['domain_has_weak_crypto']: - score['bod_1801_compliance'] = True - if not domain['ocsp_domain']: - self.__bod_1801_count += 1 - else: - score['bod_1801_compliance'] = False - - if domain.get('subdomains'): # if this domain has any subdomains - for subdomain in domain['subdomains']: - subdomain_score = self.__score_domain(subdomain) - if subdomain_score['live_bool']: # Only add live - # subdomains add this subdomain's score to this - # domain's list of subdomain_scores - score['subdomain_scores'].append(subdomain_score) - return score - - def __populate_report_doc(self): - # index = 0 - # sort list of all domains - self.__all_domains.sort(key=lambda x: x['domain']) - # sort list of base domains - self.__base_domains.sort(key=lambda x: x['domain']) - - # Go through each base domain and score the attributes - for domain in self.__base_domains: - score = self.__score_domain(domain) - # Add domain's score to master list of scores - self.__report_doc['scores'].append(score) - - if not self.__all_eligible_domains_count: - # TODO Decide if we want to generate an empty report in this case - print('ERROR: "{}" has no live domains - exiting without generating report!'.format(self.__agency)) - sys.exit(-1) - - self.__uses_https_percentage = round( - self.__domain_supports_https_count / - self.__all_eligible_domains_count * 100.0, - 1 - ) - self.__enforces_https_percentage = round( - self.__domain_enforces_https_count / - self.__all_eligible_domains_count * 100.0, - 1 - ) - self.__hsts_percentage = round( - self.__domain_uses_strong_hsts_count / - self.__all_eligible_domains_count * 100.0, - 1 - ) - self.__has_no_weak_crypto_percentage = round( - self.__domain_has_no_weak_crypto_count / - self.__all_eligible_domains_count * 100, - 1 - ) - self.__bod_1801_percentage = round( - self.__bod_1801_count / - self.__all_eligible_domains_count * 100.0, - 1 - ) - - # self.__write_to_overview() # generates ARTIFACTS_DIR + - # "/reporting.csv" - is this still needed? - - def __latex_escape(self, to_escape): - return ''.join([LATEX_ESCAPE_MAP.get(i, i) for i in to_escape]) - - def __latex_escape_structure(self, data): - '''assumes that all sequences contain dicts''' - if isinstance(data, dict): - for k, v in data.items(): - if k.endswith('_tex'): # skip special tex values - continue - if isinstance(v, str): - data[k] = self.__latex_escape(v) - else: - self.__latex_escape_structure(v) - elif isinstance(data, (list, tuple)): - for i in data: - self.__latex_escape_structure(i) - - def generate_https_scan_report(self): - print('\tParsing data') - # build up the report_doc from the query results - self.__populate_report_doc() - - # sort org lists - if self.__https_compliance_list: - self.__https_compliance_list.sort(key=lambda x: x['domain']) - if self.__non_https_compliance_list: - self.__non_https_compliance_list.sort(key=lambda x: x['domain']) - - # create a working directory - original_working_dir = os.getcwd() - if self.__debug: - temp_working_dir = tempfile.mkdtemp(dir=original_working_dir) - else: - temp_working_dir = tempfile.mkdtemp() - - # setup the working directory - self.__setup_work_directory(temp_working_dir) - os.chdir(temp_working_dir) - - print('\tGenerating attachments') - # generate attachments - self.__generate_attachments() - - print('\tGenerating charts') - # generate charts - self.__generate_charts() - - # generate json input to mustache - self.__generate_mustache_json(REPORT_JSON) - - # generate latex json + mustache - self.__generate_latex(MUSTACHE_FILE, REPORT_JSON, REPORT_TEX) - - print('\tAssembling PDF') - # generate report figures + latex - self.__generate_final_pdf() - - # revert working directory - os.chdir(original_working_dir) - - # copy report and json file to original working directory - # and delete working directory - if not self.__debug: - src_filename = os.path.join(temp_working_dir, REPORT_PDF) - datestamp = self.__generated_time.strftime('%Y-%m-%d') - dest_dir = "." - - if self.__agency_id is not None: - dest_filename = "{}/cyhy-{}-{}-https-report.pdf".format( - dest_dir, self.__agency_id, datestamp) - else: - dest_filename = "{}/cyhy-{}-{}-https-report.pdf".format( - dest_dir, self.__agency, datestamp) - - shutil.move(src_filename, dest_filename) - return self.__results - - def __setup_work_directory(self, work_dir): - me = os.path.realpath(__file__) - my_dir = os.path.dirname(me) - for n in (MUSTACHE_FILE, PDF_CAPTURE_JS): - file_src = os.path.join(my_dir, n) - file_dst = os.path.join(work_dir, n) - shutil.copyfile(file_src, file_dst) - # copy static assets - dir_src = os.path.join(my_dir, ASSETS_DIR_SRC) - dir_dst = os.path.join(work_dir, ASSETS_DIR_DST) - shutil.copytree(dir_src, dir_dst) - - ########################################################################### - # Attachment Generation - ########################################################################### - def __generate_attachments(self): - self.__generate_https_attachment() - - def __generate_https_attachment(self): - header_fields = ('Domain', 'Base Domain', 'Domain Is Base Domain', - 'Canonical URL', 'Live', 'Redirect', 'Redirect To', - 'Valid HTTPS', 'Defaults to HTTPS', - 'Downgrades HTTPS', 'Strictly Forces HTTPS', - 'HTTPS Bad Chain', 'HTTPS Bad Hostname', - 'HTTPS Expired Cert', 'HTTPS Self Signed Cert', - 'HSTS', 'HSTS Header', 'HSTS Max Age', - 'HSTS Entire Domain', 'HSTS Preload Ready', - 'HSTS Preload Pending', 'HSTS Preloaded', - 'Base Domain HSTS Preloaded', - 'Domain Supports HTTPS', 'Domain Enforces HTTPS', - 'Domain Uses Strong HSTS', - 'HTTPS Client Auth Required', - 'Domain Supports Weak Crypto', - 'Web Hosts With Weak Crypto', - 'Domain Uses Symantec Certificate', - 'OCSP Domain', 'Unknown Error') - data_fields = ('domain', 'base_domain', 'is_base_domain', - 'canonical_url', 'live', 'redirect', 'redirect_to', - 'valid_https', 'defaults_https', - 'downgrades_https', 'strictly_forces_https', - 'https_bad_chain', 'https_bad_hostname', - 'https_expired_cert', 'https_self_signed_cert', - 'hsts', 'hsts_header', 'hsts_max_age', - 'hsts_entire_domain', 'hsts_preload_ready', - 'hsts_preload_pending', 'hsts_preloaded', - 'hsts_base_domain_preloaded', - 'domain_supports_https', 'domain_enforces_https', - 'domain_uses_strong_hsts', - 'https_client_auth_required', - 'domain_has_weak_crypto', - 'hosts_with_weak_crypto_str', - 'domain_has_symantec_cert', - 'ocsp_domain', 'unknown_error') - with open(HTTPS_RESULTS_CSV_FILE, newline='', mode='w') as out_file: - header_writer = csv.DictWriter(out_file, header_fields, - extrasaction='ignore') - header_writer.writeheader() - data_writer = csv.DictWriter(out_file, data_fields, - extrasaction='ignore') - - def rehydrate_hosts_with_weak_crypto(d): - """Build a string suitable for output from the - dictionary that was retrieved from the database - - Parameters - ---------- - d : dict - The hosts_with_weak_crypto dictionary - - Returns - ------- - str: The string with weak crypto host details. - """ - hostname = d['scanned_hostname'] - port = d['scanned_port'] - - weak_crypto_list = list() - for (wc_key, wc_text) in [ - ('sslv2', 'SSLv2'), - ('sslv3', 'SSLv3'), - ('any_3des', '3DES'), - ('any_rc4', 'RC4') - ]: - if d[wc_key]: - weak_crypto_list.append(wc_text) - result = '{0}:{1} [supports: {2}]'.format( - hostname, port, ','.join(weak_crypto_list) - ) - - return result - - def format_list(record_list): - """Format a list into a string to increase readability - in CSV""" - # record_list should only be a list, not an integer, None, or - # anything else. Thus this if clause handles only empty lists. - # This makes a "null" appear in the JSON output for empty - # lists, as expected. - if not record_list: - return None - - return ', '.join(record_list) - - for domain in self.__all_domains: - hosts_with_weak_crypto = [ - rehydrate_hosts_with_weak_crypto(d) - for d in domain['hosts_with_weak_crypto'] - ] - domain['hosts_with_weak_crypto_str'] = format_list( - hosts_with_weak_crypto - ) - data_writer.writerow(domain) - - ########################################################################### - # Chart Generation - ########################################################################### - def __generate_charts(self): - graphs.setup() - self.__generate_bod_1801_components_bar_chart() - self.__generate_donut_charts() - - def __generate_bod_1801_components_bar_chart(self): - bod_1801_bar = graphs.MyTrustyBar( - percentage_list=[ - self.__uses_https_percentage, - self.__enforces_https_percentage, - self.__hsts_percentage, - self.__has_no_weak_crypto_percentage - ], - label_list=[ - 'Uses\nHTTPS', - 'Enforces\nHTTPS', - 'Uses Strong\nHSTS', - 'No SSLv2/v3,\n3DES,RC4' - ], - fill_color=graphs.DARK_BLUE, - title='BOD 18-01 HTTPS Components') - bod_1801_bar.plot(filename='bod-18-01-https-components') - - def __generate_donut_charts(self): - bod_1801_donut = graphs.MyDonutPie( - percentage_full=round(self.__bod_1801_percentage), - label='BOD 18-01\nCompliant\n(Web)', - fill_color=graphs.DARK_BLUE) - bod_1801_donut.plot(filename='bod-18-01-compliant') - - ########################################################################### - # Final Document Generation and Assembly - ########################################################################### - def __generate_mustache_json(self, filename): - # result = {'all_domains':self.__all_domains} - result = {'report_doc': self.__report_doc} - result['ineligible_domains'] = self.__ineligible_domains - result['domain_count'] = self.__domain_count - result['subdomain_count'] = self.__subdomain_count - result['base_domain_count'] = self.__base_domain_count - result['all_eligible_domains_count'] = \ - self.__all_eligible_domains_count - result['eligible_domains_count'] = self.__eligible_domains_count - result['eligible_subdomains_count'] = self.__eligible_subdomains_count - result['https_compliance_list'] = self.__https_compliance_list - result['non_https_compliance_list'] = self.__non_https_compliance_list - result['title_date_tex'] = \ - self.__generated_time.strftime('{%d}{%m}{%Y}') - result['agency'] = self.__agency - result['agency_id'] = self.__agency_id - result['strictly_forces_percentage'] = round( - self.__strictly_forces_count / self.__domain_count * 100.0, - 1 - ) - result['downgrades_percentage'] = round( - self.__downgrades_count / self.__domain_count * 100.0, - 1 - ) - result['hsts_percentage'] = self.__hsts_percentage - result['hsts_preloaded_percentage'] = round( - self.__hsts_preloaded_count / self.__domain_count * 100.0, - 1 - ) - result['hsts_entire_domain_percentage'] = round( - self.__hsts_entire_domain_count / self.__domain_count * 100.0, - 1 - ) - # result['strictly_forces_percentage'] = 0 - # result['downgrades_percentage'] = 0 - # result['hsts_preloaded_percentage'] = 0 - # result['hsts_entire_domain_percentage'] = 0 - result['domain_has_no_weak_crypto_count'] = \ - self.__domain_has_no_weak_crypto_count - result['has_no_weak_crypto_percentage'] = \ - self.__has_no_weak_crypto_percentage - result['bod_1801_percentage'] = self.__bod_1801_percentage - result['bod_1801_count'] = self.__bod_1801_count - result['domain_supports_https_count'] = \ - self.__domain_supports_https_count # added - result['uses_https_percentage'] = self.__uses_https_percentage - result['enforces_https_percentage'] = self.__enforces_https_percentage - result['strictly_forces_count'] = self.__strictly_forces_count - result['domain_enforces_https_count'] = \ - self.__domain_enforces_https_count - result['hsts_count'] = self.__hsts_count - result['hsts_preloaded_count'] = self.__hsts_preloaded_count - result['hsts_preload_ready_count'] = self.__hsts_preload_ready_count - result['domain_uses_strong_hsts_count'] = \ - self.__domain_uses_strong_hsts_count - result['https_expired_cert_count'] = self.__https_expired_cert_count - result['https_bad_hostname_count'] = self.__https_bad_hostname_count - result['https_bad_chain_count'] = self.__https_bad_chain_count - result['hsts_low_max_age_count'] = self.__hsts_low_max_age_count - - self.__latex_escape_structure(result['report_doc']) - - with open(filename, 'w') as out: - out.write(json.dumps(result)) - - def __generate_latex(self, mustache_file, json_file, latex_file): - template = codecs.open(mustache_file, 'r', encoding='utf-8').read() - - with codecs.open(json_file, 'r', encoding='utf-8') as data_file: - data = json.load(data_file) - - r = pystache.render(template, data) - with codecs.open(latex_file, 'w', encoding='utf-8') as output: - output.write(r) - - def __generate_final_pdf(self): - if self.__debug: - output = sys.stdout - else: - output = open(os.devnull, 'w') - - return_code = subprocess.call(['xelatex', REPORT_TEX], - stdout=output, - stderr=subprocess.STDOUT) - assert return_code == 0, \ - 'xelatex pass 1 of 2 return code was %s' % return_code - - return_code = subprocess.call(['xelatex', REPORT_TEX], - stdout=output, - stderr=subprocess.STDOUT) - assert return_code == 0, \ - 'xelatex pass 2 of 2 return code was %s' % return_code - - -def main(): - args = docopt(__doc__, version='v0.0.1') - db = db_from_config(DB_CONFIG_FILE) - - print("Generating HTTPS Report for {}...".format(args['"AGENCY"'])) - generator = ReportGenerator(db, args['"AGENCY"'], debug=args['--debug']) - generator.generate_https_scan_report() - print("Done") - sys.exit(0) - - -if __name__ == '__main__': - main() diff --git a/report/pdf_capture.js b/report/pdf_capture.js deleted file mode 100644 index 8b5ad13..0000000 --- a/report/pdf_capture.js +++ /dev/null @@ -1,59 +0,0 @@ -var system = require('system'); -var page = require("webpage").create(); -var url, outfile, vp_width, vp_height; - -page.onError = function(msg, trace) { - var msgStack = ['ERROR: ' + msg]; - - if (trace && trace.length) { - msgStack.push('TRACE:'); - trace.forEach(function(t) { - msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : '')); - }); - } - console.error(msgStack.join('\n')); -}; - -page.onConsoleMessage = function(msg, lineNum, sourceId) { - console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")'); -}; - -if (system.args.length != 5) { - console.log('Usage: pdf_capture.js URL filename window_width window_height'); - phantom.exit(1); -} else { - url = system.args[1]; - outfile = system.args[2]; - vp_width = parseInt(system.args[3]); - vp_height = parseInt(system.args[4]); - page.viewportSize = { width:vp_width, height:vp_height }; - page.paperSize = { width:vp_width+'px', height:vp_height+12+'px', margin: '0px' }; // Add 12 pixels of height to keep PDF on 1 page - - function onPageReady() { - page.evaluate(function () { - return document.documentElement.outerHTML; - }); - //console.log(htmlContent); - page.render(outfile); - console.log('Wrote output file: ' + outfile); - phantom.exit(); - } - - console.log('\nOpening page: ' + url); - page.open(url, function (status) { - function checkReadyState() { - setTimeout(function () { - var readyState = page.evaluate(function () { - return ((typeof cybex_chart4 !== 'undefined') && (cybex_chart4.data().length > 0)); - }); - - if (readyState) { - setTimeout(function () { onPageReady() }, 1000); // Wait for d3 transition to complete after data has been rec'd - } else { - checkReadyState(); - } - }, 200); - } - checkReadyState(); - }); -} diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..1d7e302 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +--requirement requirements-test.txt +ipython +mypy +semver diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..5f3337c --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,4 @@ +--requirement requirements.txt +pre-commit +pytest +pytest-dockerc diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0a8547b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +setuptools +wheel diff --git a/setup-env b/setup-env new file mode 100755 index 0000000..f526cdb --- /dev/null +++ b/setup-env @@ -0,0 +1,190 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +USAGE=$( + cat << 'END_OF_LINE' +Configure a development environment for this repository. + +It does the following: + - Verifies pyenv and pyenv-virtualenv are installed. + - Creates a Python virtual environment. + - Configures the activation of the virtual enviroment for the repo directory. + - Installs the requirements needed for development. + - Installs git pre-commit hooks. + - Configures git upstream remote "lineage" repositories. + +Usage: + setup-env [options] [virt_env_name] + setup-env (-h | --help) + +Options: + -f --force Delete virtual enviroment if it already exists. + -h --help Show this message. + -i --install-hooks Install hook environments for all environments in the + pre-commit config file. + +END_OF_LINE +) + +# Flag to force deletion and creation of virtual environment +FORCE=0 + +# Positional parameters +PARAMS="" + +# Parse command line arguments +while (("$#")); do + case "$1" in + -f | --force) + FORCE=1 + shift + ;; + -h | --help) + echo "${USAGE}" + exit 0 + ;; + -i | --install-hooks) + INSTALL_HOOKS=1 + shift + ;; + -*) # unsupported flags + echo "Error: Unsupported flag $1" >&2 + exit 1 + ;; + *) # preserve positional arguments + PARAMS="$PARAMS $1" + shift + ;; + esac +done + +# set positional arguments in their proper place +eval set -- "$PARAMS" + +# Check to see if pyenv is installed +if [ -z "$(command -v pyenv)" ] || [ -z "$(command -v pyenv-virtualenv)" ]; then + echo "pyenv and pyenv-virtualenv are required." + if [[ "$OSTYPE" == "darwin"* ]]; then + cat << 'END_OF_LINE' + + On the Mac, we recommend installing brew, https://brew.sh/. Then installation + is as simple as `brew install pyenv pyenv-virtualenv` and adding this to your + profile: + + eval "$(pyenv init -)" + eval "$(pyenv virtualenv-init -)" + +END_OF_LINE + + fi + cat << 'END_OF_LINE' + For Linux, Windows Subsystem for Linux (WSL), or on the Mac (if you don't want + to use "brew") you can use https://github.com/pyenv/pyenv-installer to install + the necessary tools. Before running this ensure that you have installed the + prerequisites for your platform according to the pyenv wiki page, + https://github.com/pyenv/pyenv/wiki/common-build-problems. + + On WSL you should treat your platform as whatever Linux distribution you've + chosen to install. + + Once you have installed "pyenv" you will need to add the following lines to + your ".bashrc": + + export PATH="$PATH:$HOME/.pyenv/bin" + eval "$(pyenv init -)" + eval "$(pyenv virtualenv-init -)" +END_OF_LINE + exit 1 +fi + +set +o nounset +# Determine the virtual environment name +if [ "$1" ]; then + # Use the user-provided environment name + env_name=$1 +else + # Set the environment name to the last part of the working directory. + env_name=${PWD##*/} +fi +set -o nounset + +# Remove any lingering local configuration. +if [ $FORCE -ne 0 ]; then + rm -f .python-version + pyenv virtualenv-delete --force "${env_name}" || true +elif [[ -f .python-version ]]; then + cat << 'END_OF_LINE' + An existing .python-version file was found. Either remove this file yourself + or re-run with --force option to have it deleted along with the associated + virtual environment. + + rm .python-version + +END_OF_LINE + exit 1 +fi + +# Create a new virtual environment for this project +if ! pyenv virtualenv "${env_name}"; then + cat << END_OF_LINE + An existing virtual environment named $env_name was found. Either delete this + environment yourself or re-run with --force option to have it deleted. + + pyenv virtualenv-delete ${env_name} + +END_OF_LINE + exit 1 +fi + +# Set the local application-specific Python version(s) by writing the +# version name to a file named `.python-version'. +pyenv local "${env_name}" + +# Upgrade pip and friends +python3 -m pip install --upgrade pip setuptools wheel + +# Find a requirements file (if possible) and install +for req_file in "requirements-dev.txt" "requirements-test.txt" "requirements.txt"; do + if [[ -f $req_file ]]; then + pip install --requirement $req_file + break + fi +done + +# Install git pre-commit hooks now or later. +pre-commit install ${INSTALL_HOOKS:+"--install-hooks"} + +# Setup git remotes from lineage configuration +# This could fail if the remotes are already setup, but that is ok. +set +o errexit + +eval "$( + python3 << 'END_OF_LINE' +from pathlib import Path +import yaml +import sys + +LINEAGE_CONFIG = Path(".github/lineage.yml") + +if not LINEAGE_CONFIG.exists(): + print("No lineage configuration found.", file=sys.stderr) + sys.exit(0) + +with LINEAGE_CONFIG.open("r") as f: + lineage = yaml.safe_load(stream=f) + +if lineage["version"] == "1": + for parent_name, v in lineage["lineage"].items(): + remote_url = v["remote-url"] + print(f"git remote add {parent_name} {remote_url};") + print(f"git remote set-url --push {parent_name} no_push;") +else: + print(f'Unsupported lineage version: {lineage["version"]}', file=sys.stderr) +END_OF_LINE +)" + +# Qapla +echo "Success!" diff --git a/assets/assessment-summary-title.pdf b/src/assets/assessment-summary-title.pdf similarity index 100% rename from assets/assessment-summary-title.pdf rename to src/assets/assessment-summary-title.pdf diff --git a/assets/cisa-logo.png b/src/assets/cisa-logo.png similarity index 100% rename from assets/cisa-logo.png rename to src/assets/cisa-logo.png diff --git a/assets/dhs-logo.pdf b/src/assets/dhs-logo.pdf similarity index 100% rename from assets/dhs-logo.pdf rename to src/assets/dhs-logo.pdf diff --git a/assets/placeholder.pdf b/src/assets/placeholder.pdf similarity index 100% rename from assets/placeholder.pdf rename to src/assets/placeholder.pdf diff --git a/fonts/Arial Bold Italic.ttf b/src/fonts/Arial Bold Italic.ttf similarity index 100% rename from fonts/Arial Bold Italic.ttf rename to src/fonts/Arial Bold Italic.ttf diff --git a/fonts/Arial Bold.ttf b/src/fonts/Arial Bold.ttf similarity index 100% rename from fonts/Arial Bold.ttf rename to src/fonts/Arial Bold.ttf diff --git a/fonts/Arial Italic.ttf b/src/fonts/Arial Italic.ttf similarity index 100% rename from fonts/Arial Italic.ttf rename to src/fonts/Arial Italic.ttf diff --git a/fonts/Arial.ttf b/src/fonts/Arial.ttf similarity index 100% rename from fonts/Arial.ttf rename to src/fonts/Arial.ttf diff --git a/fonts/Franklin Gothic Book Italic.ttf b/src/fonts/Franklin Gothic Book Italic.ttf similarity index 100% rename from fonts/Franklin Gothic Book Italic.ttf rename to src/fonts/Franklin Gothic Book Italic.ttf diff --git a/fonts/Franklin Gothic Book.ttf b/src/fonts/Franklin Gothic Book.ttf similarity index 100% rename from fonts/Franklin Gothic Book.ttf rename to src/fonts/Franklin Gothic Book.ttf diff --git a/fonts/Franklin Gothic Demi Regular.ttf b/src/fonts/Franklin Gothic Demi Regular.ttf similarity index 100% rename from fonts/Franklin Gothic Demi Regular.ttf rename to src/fonts/Franklin Gothic Demi Regular.ttf diff --git a/fonts/Franklin Gothic Medium Italic.ttf b/src/fonts/Franklin Gothic Medium Italic.ttf similarity index 100% rename from fonts/Franklin Gothic Medium Italic.ttf rename to src/fonts/Franklin Gothic Medium Italic.ttf diff --git a/fonts/Franklin Gothic Medium Regular.ttf b/src/fonts/Franklin Gothic Medium Regular.ttf similarity index 100% rename from fonts/Franklin Gothic Medium Regular.ttf rename to src/fonts/Franklin Gothic Medium Regular.ttf diff --git a/fonts/SourceCodePro-Black.otf b/src/fonts/SourceCodePro-Black.otf old mode 100755 new mode 100644 similarity index 100% rename from fonts/SourceCodePro-Black.otf rename to src/fonts/SourceCodePro-Black.otf diff --git a/fonts/SourceCodePro-Bold.otf b/src/fonts/SourceCodePro-Bold.otf old mode 100755 new mode 100644 similarity index 100% rename from fonts/SourceCodePro-Bold.otf rename to src/fonts/SourceCodePro-Bold.otf diff --git a/fonts/SourceCodePro-ExtraLight.otf b/src/fonts/SourceCodePro-ExtraLight.otf old mode 100755 new mode 100644 similarity index 100% rename from fonts/SourceCodePro-ExtraLight.otf rename to src/fonts/SourceCodePro-ExtraLight.otf diff --git a/fonts/SourceCodePro-Light.otf b/src/fonts/SourceCodePro-Light.otf old mode 100755 new mode 100644 similarity index 100% rename from fonts/SourceCodePro-Light.otf rename to src/fonts/SourceCodePro-Light.otf diff --git a/fonts/SourceCodePro-Regular.otf b/src/fonts/SourceCodePro-Regular.otf old mode 100755 new mode 100644 similarity index 100% rename from fonts/SourceCodePro-Regular.otf rename to src/fonts/SourceCodePro-Regular.otf diff --git a/fonts/SourceCodePro-Semibold.otf b/src/fonts/SourceCodePro-Semibold.otf old mode 100755 new mode 100644 similarity index 100% rename from fonts/SourceCodePro-Semibold.otf rename to src/fonts/SourceCodePro-Semibold.otf diff --git a/report.sh b/src/report.sh similarity index 70% rename from report.sh rename to src/report.sh index 1d25002..1fadb4f 100755 --- a/report.sh +++ b/src/report.sh @@ -1,6 +1,6 @@ #!/bin/bash -HOME_DIR='/home/reporter' +HOME_DIR='/home/cisa' SHARED_DIR=$HOME_DIR'/shared' # Prepare fonts @@ -9,11 +9,10 @@ cp ./fonts/* /usr/share/fonts/truetype/ fc-cache -f echo 'Waiting for saver' -while [ "$(redis-cli -h redis get saving_complete)" != "true" ] -do - sleep 5 +while [ "$(redis-cli -h redis get saving_complete)" != "true" ]; do + sleep 5 done -echo "Saver finished" +echo Saver finished # Don't delete saving_complete here since trustymail_reporter may be # using it too. @@ -21,7 +20,7 @@ echo "Saver finished" # Because HHS/NASA reports are large, we need to increase buffer size (LaTeX) sed -i 's/buf_size = 200000/buf_size = 1000000/' /usr/share/texmf/web2c/texmf.cnf -echo "Creating reporting folders..." +echo Creating reporting folders... mkdir -p $SHARED_DIR/artifacts/reporting/pshtt_reports # Grab OCSP/CRL hosts. These hosts are to be removed from the list of @@ -29,29 +28,28 @@ mkdir -p $SHARED_DIR/artifacts/reporting/pshtt_reports # required to satisfy BOD 18-01. For more information see here: # https://https.cio.gov/guide/#are-federally-operated-certificate-revocation-services-crl-ocsp-also-required-to-move-to-https wget https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/ocsp-crl.csv \ - -O $SHARED_DIR/artifacts/ocsp-crl.csv + -O $SHARED_DIR/artifacts/ocsp-crl.csv # Generate agency reports -cd $SHARED_DIR/artifacts/reporting/pshtt_reports +cd $SHARED_DIR/artifacts/reporting/pshtt_reports || exit 1 $HOME_DIR/report/create_all_reports.py # Wait for the trustworthy email reporting to finish -echo 'Waiting for trustworthy email reporting' -while [ "$(redis-cli -h redis get trustymail_reporting_complete)" != "true" ] -do - sleep 5 +echo Waiting for trustworthy email reporting +while [ "$(redis-cli -h redis get trustymail_reporting_complete)" != "true" ]; do + sleep 5 done -echo "Trustworthy email reporting finished" +echo Trustworthy email reporting finished # Archive artifacts folder -echo 'Archiving Results...' +echo Archiving Results... mkdir -p $SHARED_DIR/archive/ TODAY=$(date +'%Y-%m-%d') -mv $SHARED_DIR/artifacts $SHARED_DIR/artifacts_$TODAY -tar czf $SHARED_DIR/archive/artifacts_$TODAY.tar.gz -C $SHARED_DIR artifacts_$TODAY/ +mv $SHARED_DIR/artifacts $SHARED_DIR/artifacts_"$TODAY" +tar czf $SHARED_DIR/archive/artifacts_"$TODAY".tar.gz -C $SHARED_DIR artifacts_"$TODAY"/ # Save the artifacts directory as latest rm -rf $SHARED_DIR/archive/latest -mv $SHARED_DIR/artifacts_$TODAY $SHARED_DIR/archive/latest +mv $SHARED_DIR/artifacts_"$TODAY" $SHARED_DIR/archive/latest # No longer needed redis-cli -h redis del saving_complete trustymail_reporting_complete diff --git a/src/report/__init__.py b/src/report/__init__.py new file mode 100644 index 0000000..21c60e2 --- /dev/null +++ b/src/report/__init__.py @@ -0,0 +1 @@ +"""Create PSHTT reports.""" diff --git a/src/report/create_all_reports.py b/src/report/create_all_reports.py new file mode 100755 index 0000000..ffe8122 --- /dev/null +++ b/src/report/create_all_reports.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +"""Generate all reports.""" + +# Standard Python Libraries +import csv + +# generate_https_scan_report.py isn't written in a way that easily +# allows it to be run in any other way, but Bandit doesn't like us to +# use subprocess. Hence the nosec. +import subprocess # nosec B404 + +HOME_DIR = "/home/cisa" +SHARED_DATA_DIR = f"{HOME_DIR}/shared" + + +def main(): + """Generate all reports.""" + with open(f"{SHARED_DATA_DIR}/artifacts/unique-agencies.csv") as agency_csv: + for row in sorted(csv.reader(agency_csv)): + bashCommand = [f"{HOME_DIR}/report/generate_https_scan_report.py", row[0]] + # generate_https_scan_report.py isn't written in a way + # that easily allows it to be run in any other way, but + # Bandit doesn't like us to use subprocess. Hence the + # nosec. + subprocess.run(bashCommand) # nosec B404 + + +if __name__ == "__main__": + main() diff --git a/src/report/generate_https_scan_report.py b/src/report/generate_https_scan_report.py new file mode 100755 index 0000000..68f9fec --- /dev/null +++ b/src/report/generate_https_scan_report.py @@ -0,0 +1,928 @@ +#!/usr/bin/env python3 + +"""Create Cyber Hygiene HTTPS Report PDF. + +Usage: + generate_https_scan_report [options] AGENCY + generate_https_scan_report (-h | --help) + generate_https_scan_report --version + +Options: + -d --debug Keep intermediate files for debugging. + -h --help Show this screen. + --version Show version. +""" +# Standard Python Libraries +import codecs +import csv +from datetime import datetime +import json +import os +import shutil + +# The subprocess module is frowned upon by Bandit, but we need it +# here. +import subprocess # nosec B404 +import sys +import tempfile + +# Third-Party Libraries +import chevron +from docopt import docopt + +# intra-project modules +import graphs +from mongo_db_from_config import db_from_config + +# constants +HOME_DIR = "/home/cisa" +SHARED_DATA_DIR = f"{HOME_DIR}/shared" +DB_CONFIG_FILE = "/run/secrets/scan_read_creds.yml" +HTTPS_RESULTS_CSV_FILE = "pshtt-results.csv" +OCSP_EXCLUSION_CSV_FILE = f"{SHARED_DATA_DIR}/artifacts/ocsp-crl.csv" +# Do not include the orgs below (based on _id) in the Report +EXEMPT_ORGS: list[str] = [] +MUSTACHE_FILE = "https_scan_report.mustache" +REPORT_JSON = "https_scan_report.json" +REPORT_PDF = "https_scan_report.pdf" +REPORT_TEX = "https_scan_report.tex" +ASSETS_DIR_SRC = "../assets" +ASSETS_DIR_DST = "assets" +LATEX_ESCAPE_MAP = { + "$": "\\$", + "%": "\\%", + "&": "\\&", + "#": "\\#", + "_": "\\_", + "{": "\\{", + "}": "\\}", + "[": "{[}", + "]": "{]}", + "'": "{'}", + "\\": "\\textbackslash{}", + "~": "\\textasciitilde{}", + "<": "\\textless{}", + ">": "\\textgreater{}", + "^": "\\textasciicircum{}", + "`": "{}`", + "\n": "\\newline{}", +} +PDF_CAPTURE_JS = "pdf_capture.js" + + +class ReportGenerator: + """Class for generating a PSHTT report.""" + + def __init__(self, db, agency, debug=False): + """Initialize the instance.""" + self.__db = db + self.__agency = agency + self.__agency_id = None + self.__debug = debug + self.__generated_time = datetime.utcnow() + self.__results = dict() # reusable query results + self.__requests = None + self.__report_doc = {"scores": []} + self.__all_domains = [] + self.__base_domains = [] + self.__eligible_domains_count = 0 # second-level/base-domains + self.__eligible_subdomains_count = 0 + self.__all_eligible_domains_count = 0 # responsive base+subs + self.__https_compliance_list = [] + self.__non_https_compliance_list = [] + self.__ineligible_domains = [] + self.__domain_count = 0 + self.__base_domain_count = 0 + self.__subdomain_count = 0 + self.__domain_supports_https = 0 + self.__domain_supports_https_count = 0 + self.__domain_enforces_https_count = 0 # added + self.__domain_uses_strong_hsts_count = 0 + self.__domain_has_no_weak_crypto_count = 0 + self.__strictly_forces_count = 0 + self.__downgrades_count = 0 + self.__hsts_count = 0 + self.__hsts_preloaded_count = 0 + self.__hsts_preload_ready_count = 0 + self.__hsts_entire_domain_count = 0 + self.__https_bad_chain_count = 0 + self.__https_bad_hostname_count = 0 + self.__https_expired_cert_count = 0 + self.__bod_1801_count = 0 + self.__hsts_base_domain_preloaded_count = 0 + self.__hsts_low_max_age_count = 0 + # self.__report_oid = ObjectId() + + # Read in and parse the OCSP exclusion domains. + # + # We use a dict for ocsp_exclusions because we want to take + # advantage of the speed of the underlying hash map. (We only + # care if a domain is present as an exclusion or not.) + ocsp_exclusions = {} + with open(OCSP_EXCLUSION_CSV_FILE, newline="") as ocsp_file: + csvreader = csv.reader(ocsp_file) + ocsp_exclusions = {row[0]: None for row in csvreader} + + # Get list of all domains from the database. Use no_cursor_timeout + # to handle agencies with a large number of domains. + all_domains_cursor = self.__db.https_scan.find( + {"latest": True, "agency.name": agency}, no_cursor_timeout=True + ) + all_domains_count = self.__db.https_scan.count_documents( + {"latest": True, "agency.name": agency} + ) + # We really shouldn't include OCSP excluded domains in the + # total count. We do want to score them, for informational + # purposes, but the scores will not impact compliance. + # Therefore I should really perform this query: + # self.__domain_count = self.__db.https_scan.count_documents({ + # 'latest': True, + # 'agency.name': agency, + # 'domain': { + # '$nin': ocsp_exclusions.keys() + # } + # }) + # + # In reality this value is not used in the report at all, so + # it doesn't matter. + self.__domain_count = all_domains_count + + # Get weak crypto data for this agency's domains from the + # sslyze-scan collection + # + # TODO: Consider using aggregation $lookup with uncorrelated + # subquery to fetch https_scan and sslyze_scan data in one + # query (MongoDB server 3.6 and later) + + sslyze_data_all_domains = dict() + for host in self.__db.sslyze_scan.find( + {"latest": True, "agency.name": agency, "scanned_port": 443}, + { + "_id": 0, + "domain": 1, + "scanned_port": 1, + "scanned_hostname": 1, + "sslv2": 1, + "sslv3": 1, + "any_3des": 1, + "any_rc4": 1, + "is_symantec_cert": 1, + }, + ): + current_host_dict = { + "scanned_hostname": host["scanned_hostname"], + "scanned_port": host["scanned_port"], + "sslv2": host["sslv2"], + "sslv3": host["sslv3"], + "any_3des": host["any_3des"], + "any_rc4": host["any_rc4"], + "is_symantec_cert": host["is_symantec_cert"], + } + + if not sslyze_data_all_domains.get(host["domain"]): + sslyze_data_all_domains[host["domain"]] = [current_host_dict] + else: + sslyze_data_all_domains[host["domain"]].append(current_host_dict) + + def add_weak_crypto_data_to_domain(domain_doc, sslyze_data_all_domains): + # Look for weak crypto data in sslyze_data_all_domains and + # add hosts with weak crypto to + # domain_doc['hosts_with_weak_crypto'] + domain_doc["domain_has_weak_crypto"] = False + domain_doc["hosts_with_weak_crypto"] = [] + domain_doc["domain_has_symantec_cert"] = False + + if sslyze_data_all_domains.get(domain_doc["domain"]): + for host in sslyze_data_all_domains[domain_doc["domain"]]: + if ( + host["sslv2"] + or host["sslv3"] + or host["any_3des"] + or host["any_rc4"] + ): + domain_doc["domain_has_weak_crypto"] = True + domain_doc["hosts_with_weak_crypto"].append(host) + if host["is_symantec_cert"]: + domain_doc["domain_has_symantec_cert"] = True + return domain_doc + + for domain_doc in all_domains_cursor: + domain_doc = add_weak_crypto_data_to_domain( + domain_doc, sslyze_data_all_domains + ) + domain_doc["ocsp_domain"] = domain_doc["domain"] in ocsp_exclusions + self.__all_domains.append(domain_doc) + if domain_doc["is_base_domain"]: + domain_doc["subdomains"] = list( + self.__db.https_scan.find( + { + "latest": True, + "base_domain": domain_doc["base_domain"], + "is_base_domain": False, + } + ).sort([("domain", 1)]) + ) + self.__subdomain_count += len(domain_doc["subdomains"]) + for subdomain_doc in domain_doc["subdomains"]: + subdomain_doc = add_weak_crypto_data_to_domain( + subdomain_doc, sslyze_data_all_domains + ) + subdomain_doc["ocsp_domain"] = ( + subdomain_doc["domain"] in ocsp_exclusions + ) + self.__base_domains.append(domain_doc) + self.__agency_id = domain_doc["agency"]["id"] + + # We instantiated this cursor without a timeout, so we have to + # close it manually. + all_domains_cursor.close() + + # Get a count of the second-level domains an agency owns. + # + # Really I should exclude OCSP domains here, but this isn't + # necessary since OCSP domains should be individual hostnames + # and not second-level domains. + self.__base_domain_count = self.__db.https_scan.count_documents( + {"latest": True, "agency.name": agency, "is_base_domain": True} + ) + + def __score_domain(self, domain): + score = { + "domain": domain["domain"], + "ocsp_domain": domain["ocsp_domain"], + "subdomain_scores": list(), + } + + if domain["live"]: + score["live_bool"] = True + # OCSP domains aren't eligible + if not domain["ocsp_domain"]: + if domain["is_base_domain"]: + self.__eligible_domains_count += 1 + self.__all_eligible_domains_count += 1 + else: + self.__eligible_subdomains_count += 1 + self.__all_eligible_domains_count += 1 + else: + # TODO Determine if this is still needed + self.__ineligible_domains.append({"domain": domain["domain"]}) + else: + score["live_bool"] = False + if domain["is_base_domain"]: + # only include non-live base domains in the ineligible + # domains list; otherwise lots of non-existent subs + # will show in the report + + # TODO Determine if this is still needed + self.__ineligible_domains.append({"domain": domain["domain"]}) + + # https_full_connection and https_client_auth_required + if domain["https_full_connection"]: + score["https_full_connection_bool"] = True + else: + score["https_full_connection_bool"] = False + if domain["https_client_auth_required"]: + score["https_client_auth_required_bool"] = True + else: + score["https_client_auth_required_bool"] = False + + # strictly_forces_https + if domain["strictly_forces_https"]: + # score['strictly_forces_https'] = 'Yes' + score["strictly_forces_https_bool"] = True + if not domain["ocsp_domain"]: + self.__strictly_forces_count += 1 + else: + # score['strictly_forces_https'] = 'No' + score["strictly_forces_https_bool"] = False + + # "Uses HTTPS", domains_supports_https + # + # Domain gets credit for supporting HTTPS as long as it's live + # and hsts_base_domain_preloaded is true + if domain["domain_supports_https"] or ( + domain["live"] and domain["hsts_base_domain_preloaded"] + ): + # score['domain_supports_https'] = 'Yes' + score["domain_supports_https_bool"] = True + if not domain["ocsp_domain"]: + self.__domain_supports_https_count += 1 + else: + # score['domain_supports_https'] = 'No' + score["domain_supports_https_bool"] = False + + # "Enforces HTTPS", domain_enforces_https + # + # Domain gets credit for enforcing HTTPS as long as it's live + # and hsts_base_domain_preloaded is true + if domain["domain_enforces_https"] or ( + domain["live"] and domain["hsts_base_domain_preloaded"] + ): + # score['domain_enforces_https'] = 'Yes' + score["domain_enforces_https_bool"] = True + if not domain["ocsp_domain"]: + self.__domain_enforces_https_count += 1 + else: + # score['domain_enforces_https'] = 'No' + score["domain_enforces_https_bool"] = False + + # https_bad_chain + if domain["https_bad_chain"] and domain["https_bad_hostname"]: + score["https_bad_chain_bool"] = True + if not domain["ocsp_domain"]: + self.__https_bad_chain_count += 1 + elif (domain["https_bad_chain"] and not domain["https_bad_hostname"]) or ( + domain["https_bad_chain"] and domain["https_expired_cert"] + ): + if not domain["ocsp_domain"]: + self.__https_bad_chain_count += 1 + else: + score["https_bad_chain_bool"] = False + + # https_bad_hostname + if domain["https_bad_hostname"]: + score["https_bad_hostname_bool"] = True + if not domain["ocsp_domain"]: + self.__https_bad_hostname_count += 1 + else: + score["https_bad_hostname_bool"] = False + + # https_expired_cert + if domain["https_expired_cert"]: + score["https_expired_cert_bool"] = True + if not domain["ocsp_domain"]: + self.__https_expired_cert_count += 1 + else: + score["https_expired_cert_bool"] = False + + # redirect + if domain["redirect"]: + score["redirect_bool"] = True + else: + score["redirect_bool"] = False + + # downgrades_https + if domain["downgrades_https"]: + # score['downgrades_https'] = 'Yes' + score["downgrades_https_bool"] = True + if not domain["ocsp_domain"]: + self.__downgrades_count += 1 + else: + # score['downgrades_https'] = 'No' + score["downgrades_https_bool"] = False + + # Is the domain's base_domain preloaded? + # In this case, we only care if the domain is live + if domain["live"] and domain["hsts_base_domain_preloaded"]: + score["hsts_base_domain_preloaded_bool"] = True + if not domain["ocsp_domain"]: + self.__hsts_base_domain_preloaded_count += 1 + else: + score["hsts_base_domain_preloaded"] = False + + # hsts_preloaded > hsts_preload_pending > hsts_preload_ready + if domain["hsts_preloaded"]: + # score['hsts_preloaded'] = 'Yes' + score["hsts_preloaded_bool"] = True + if not domain["ocsp_domain"]: + self.__hsts_preloaded_count += 1 + else: + score["hsts_preloaded_bool"] = False + # score['hsts_preloaded'] = 'No' + if domain["hsts_preload_pending"]: + score["hsts_preload_pending_bool"] = True + else: + score["hsts_preload_pending_bool"] = False + + if domain["hsts_preload_ready"]: + score["hsts_preload_ready_bool"] = True + # score['hsts_preload_ready'] = 'Yes' + if not domain["ocsp_domain"]: + self.__hsts_preload_ready_count += 1 + else: + score["hsts_preload_ready_bool"] = False + # score['hsts_preload_ready'] = 'No' + + # Are the HSTS headers being served? + if domain["hsts"]: + # score['hsts'] = 'Yes' + score["hsts_bool"] = True + + # HTTPS Strict Transport Security (HSTS): This is 'Yes' in + # the report only if HSTS is present and the max-age is >= + # 1 year, as BOD 18-01 requires + # + # Domain gets credit for strong HSTS as long as it's live + # and hsts_base_domain_preloaded is true + if domain["domain_uses_strong_hsts"] or ( + domain["live"] and domain["hsts_base_domain_preloaded"] + ): + score["domain_uses_strong_hsts_bool"] = True + if not domain["ocsp_domain"]: + self.__domain_uses_strong_hsts_count += 1 + else: + score["domain_uses_strong_hsts_bool"] = False + if 0 < domain["hsts_max_age"] < 31536000: + if not domain["ocsp_domain"]: + self.__hsts_low_max_age_count += 1 + elif domain["live"] and ( + domain["hsts_base_domain_preloaded"] + or ( + not domain["https_full_connection"] + and domain["https_client_auth_required"] + ) + ): + # If HSTS is not present but the base_domain is preloaded, + # "HSTS" gets a thumbs up. In this case, we only care if + # the domain is live. + # + # If we can't make a full HTTPS connection because the + # domain requires client authentication, then we can't + # know if they serve HSTS headers or not. We have chosen + # to give them the benefit of the doubt. + score["domain_uses_strong_hsts_bool"] = True + if not domain["ocsp_domain"]: + self.__domain_uses_strong_hsts_count += 1 + else: + # No HSTS + # score['hsts'] = 'No' + score["hsts_bool"] = False + score["hsts_preloaded_bool"] = False + score["hsts_preload_pending_bool"] = False + score["hsts_preload_ready_bool"] = False + score["domain_uses_strong_hsts_bool"] = False + + # Does the domain have weak crypto? + score["domain_has_weak_crypto_bool"] = domain["domain_has_weak_crypto"] + if domain["live"] and not domain["domain_has_weak_crypto"]: + if not domain["ocsp_domain"]: + self.__domain_has_no_weak_crypto_count += 1 + # Build list of weak crypto host info and save it in + # score['hosts_with_weak_crypto'] + score["hosts_with_weak_crypto"] = list() + for host in domain["hosts_with_weak_crypto"]: + weak_crypto_list = list() + for (wc_key, wc_text) in [ + ("sslv2", "SSLv2"), + ("sslv3", "SSLv3"), + ("any_3des", "3DES"), + ("any_rc4", "RC4"), + ]: + if host[wc_key]: + weak_crypto_list.append(wc_text) + score["hosts_with_weak_crypto"].append( + { + "hostname": host["scanned_hostname"], + "port": host["scanned_port"], + "weak_crypto_list_str": ", ".join(weak_crypto_list), + } + ) + + # Does the domain have a Symantec cert? + # If so, they have to be replaced - see: + # https://www.symantec.com/connect/blogs/information-replacement-symantec-ssltls-certificates + score["domain_has_symantec_cert_bool"] = domain["domain_has_symantec_cert"] + + # BOD 18-01 compliant? + if ( + ( + domain["domain_supports_https"] + and domain["domain_enforces_https"] + and domain["domain_uses_strong_hsts"] + ) + or ( + domain["live"] + and ( + domain["hsts_base_domain_preloaded"] + or ( + not domain["https_full_connection"] + and domain["https_client_auth_required"] + ) + ) + ) + ) and not domain["domain_has_weak_crypto"]: + score["bod_1801_compliance"] = True + if not domain["ocsp_domain"]: + self.__bod_1801_count += 1 + else: + score["bod_1801_compliance"] = False + + if domain.get("subdomains"): # if this domain has any subdomains + for subdomain in domain["subdomains"]: + subdomain_score = self.__score_domain(subdomain) + if subdomain_score["live_bool"]: # Only add live + # subdomains add this subdomain's score to this + # domain's list of subdomain_scores + score["subdomain_scores"].append(subdomain_score) + return score + + def __populate_report_doc(self): + # index = 0 + # sort list of all domains + self.__all_domains.sort(key=lambda x: x["domain"]) + # sort list of base domains + self.__base_domains.sort(key=lambda x: x["domain"]) + + # Go through each base domain and score the attributes + for domain in self.__base_domains: + score = self.__score_domain(domain) + # Add domain's score to master list of scores + self.__report_doc["scores"].append(score) + + if not self.__all_eligible_domains_count: + # TODO Decide if we want to generate an empty report in this case + print( + f'ERROR: "{self.__agency}" has no live domains - exiting without generating report!' + ) + sys.exit(-1) + + self.__uses_https_percentage = round( + self.__domain_supports_https_count + / self.__all_eligible_domains_count + * 100.0, + 1, + ) + self.__enforces_https_percentage = round( + self.__domain_enforces_https_count + / self.__all_eligible_domains_count + * 100.0, + 1, + ) + self.__hsts_percentage = round( + self.__domain_uses_strong_hsts_count + / self.__all_eligible_domains_count + * 100.0, + 1, + ) + self.__has_no_weak_crypto_percentage = round( + self.__domain_has_no_weak_crypto_count + / self.__all_eligible_domains_count + * 100, + 1, + ) + self.__bod_1801_percentage = round( + self.__bod_1801_count / self.__all_eligible_domains_count * 100.0, 1 + ) + + # self.__write_to_overview() # generates ARTIFACTS_DIR + + # "/reporting.csv" - is this still needed? + + def __latex_escape(self, to_escape): + return "".join(LATEX_ESCAPE_MAP.get(i, i) for i in to_escape) + + def __latex_escape_structure(self, data): + """Escape data for LaTeX. + + Note that this method assumes that all sequences contain + dicts. + """ + if isinstance(data, dict): + for k, v in data.items(): + if k.endswith("_tex"): # skip special tex values + continue + if isinstance(v, str): + data[k] = self.__latex_escape(v) + else: + self.__latex_escape_structure(v) + elif isinstance(data, (list, tuple)): + for i in data: + self.__latex_escape_structure(i) + + def generate_https_scan_report(self): + """Generate the PSHTT report.""" + print("\tParsing data") + # build up the report_doc from the query results + self.__populate_report_doc() + + # sort org lists + if self.__https_compliance_list: + self.__https_compliance_list.sort(key=lambda x: x["domain"]) + if self.__non_https_compliance_list: + self.__non_https_compliance_list.sort(key=lambda x: x["domain"]) + + # create a working directory + original_working_dir = os.getcwd() + if self.__debug: + temp_working_dir = tempfile.mkdtemp(dir=original_working_dir) + else: + temp_working_dir = tempfile.mkdtemp() + + # setup the working directory + self.__setup_work_directory(temp_working_dir) + os.chdir(temp_working_dir) + + print("\tGenerating attachments") + # generate attachments + self.__generate_attachments() + + print("\tGenerating charts") + # generate charts + self.__generate_charts() + + # generate json input to mustache + self.__generate_mustache_json(REPORT_JSON) + + # generate latex json + mustache + self.__generate_latex(MUSTACHE_FILE, REPORT_JSON, REPORT_TEX) + + print("\tAssembling PDF") + # generate report figures + latex + self.__generate_final_pdf() + + # revert working directory + os.chdir(original_working_dir) + + # copy report and json file to original working directory + # and delete working directory + if not self.__debug: + src_filename = os.path.join(temp_working_dir, REPORT_PDF) + datestamp = self.__generated_time.strftime("%Y-%m-%d") + dest_dir = "." + + if self.__agency_id is not None: + dest_filename = ( + f"{dest_dir}/cyhy-{self.__agency_id}-{datestamp}-https-report.pdf" + ) + else: + dest_filename = ( + f"{dest_dir}/cyhy-{self.__agency}-{datestamp}-https-report.pdf" + ) + + shutil.move(src_filename, dest_filename) + return self.__results + + def __setup_work_directory(self, work_dir): + me = os.path.realpath(__file__) + my_dir = os.path.dirname(me) + for n in (MUSTACHE_FILE, PDF_CAPTURE_JS): + file_src = os.path.join(my_dir, n) + file_dst = os.path.join(work_dir, n) + shutil.copyfile(file_src, file_dst) + # copy static assets + dir_src = os.path.join(my_dir, ASSETS_DIR_SRC) + dir_dst = os.path.join(work_dir, ASSETS_DIR_DST) + shutil.copytree(dir_src, dir_dst) + + ########################################################################### + # Attachment Generation + ########################################################################### + def __generate_attachments(self): + self.__generate_https_attachment() + + def __generate_https_attachment(self): + header_fields = ( + "Domain", + "Base Domain", + "Domain Is Base Domain", + "Canonical URL", + "Live", + "Redirect", + "Redirect To", + "Valid HTTPS", + "Defaults to HTTPS", + "Downgrades HTTPS", + "Strictly Forces HTTPS", + "HTTPS Bad Chain", + "HTTPS Bad Hostname", + "HTTPS Expired Cert", + "HTTPS Self Signed Cert", + "HSTS", + "HSTS Header", + "HSTS Max Age", + "HSTS Entire Domain", + "HSTS Preload Ready", + "HSTS Preload Pending", + "HSTS Preloaded", + "Base Domain HSTS Preloaded", + "Domain Supports HTTPS", + "Domain Enforces HTTPS", + "Domain Uses Strong HSTS", + "HTTPS Client Auth Required", + "Domain Supports Weak Crypto", + "Web Hosts With Weak Crypto", + "Domain Uses Symantec Certificate", + "OCSP Domain", + "Unknown Error", + ) + data_fields = ( + "domain", + "base_domain", + "is_base_domain", + "canonical_url", + "live", + "redirect", + "redirect_to", + "valid_https", + "defaults_https", + "downgrades_https", + "strictly_forces_https", + "https_bad_chain", + "https_bad_hostname", + "https_expired_cert", + "https_self_signed_cert", + "hsts", + "hsts_header", + "hsts_max_age", + "hsts_entire_domain", + "hsts_preload_ready", + "hsts_preload_pending", + "hsts_preloaded", + "hsts_base_domain_preloaded", + "domain_supports_https", + "domain_enforces_https", + "domain_uses_strong_hsts", + "https_client_auth_required", + "domain_has_weak_crypto", + "hosts_with_weak_crypto_str", + "domain_has_symantec_cert", + "ocsp_domain", + "unknown_error", + ) + with open(HTTPS_RESULTS_CSV_FILE, newline="", mode="w") as out_file: + header_writer = csv.DictWriter( + out_file, header_fields, extrasaction="ignore" + ) + header_writer.writeheader() + data_writer = csv.DictWriter(out_file, data_fields, extrasaction="ignore") + + def rehydrate_hosts_with_weak_crypto(d): + """Build a string suitable for output. + + Parameters + ---------- + d : dict + The hosts_with_weak_crypto dictionary + + Returns + ------- + str: The string with weak crypto host details. + """ + hostname = d["scanned_hostname"] + port = d["scanned_port"] + + weak_crypto_list = list() + for (wc_key, wc_text) in [ + ("sslv2", "SSLv2"), + ("sslv3", "SSLv3"), + ("any_3des", "3DES"), + ("any_rc4", "RC4"), + ]: + if d[wc_key]: + weak_crypto_list.append(wc_text) + result = f"{hostname}:{port} [supports: {','.join(weak_crypto_list)}]" + + return result + + def format_list(record_list): + """Format a list into a string to increase CSV readability.""" + # record_list should only be a list, not an integer, None, or + # anything else. Thus this if clause handles only empty lists. + # This makes a "null" appear in the JSON output for empty + # lists, as expected. + if not record_list: + return None + + return ", ".join(record_list) + + for domain in self.__all_domains: + hosts_with_weak_crypto = [ + rehydrate_hosts_with_weak_crypto(d) + for d in domain["hosts_with_weak_crypto"] + ] + domain["hosts_with_weak_crypto_str"] = format_list( + hosts_with_weak_crypto + ) + data_writer.writerow(domain) + + ########################################################################### + # Chart Generation + ########################################################################### + def __generate_charts(self): + graphs.setup() + self.__generate_bod_1801_components_bar_chart() + self.__generate_donut_charts() + + def __generate_bod_1801_components_bar_chart(self): + bod_1801_bar = graphs.MyTrustyBar( + percentage_list=[ + self.__uses_https_percentage, + self.__enforces_https_percentage, + self.__hsts_percentage, + self.__has_no_weak_crypto_percentage, + ], + label_list=[ + "Uses\nHTTPS", + "Enforces\nHTTPS", + "Uses Strong\nHSTS", + "No SSLv2/v3,\n3DES,RC4", + ], + fill_color=graphs.DARK_BLUE, + title="BOD 18-01 HTTPS Components", + ) + bod_1801_bar.plot(filename="bod-18-01-https-components") + + def __generate_donut_charts(self): + bod_1801_donut = graphs.MyDonutPie( + percentage_full=round(self.__bod_1801_percentage), + label="BOD 18-01\nCompliant\n(Web)", + fill_color=graphs.DARK_BLUE, + ) + bod_1801_donut.plot(filename="bod-18-01-compliant") + + ########################################################################### + # Final Document Generation and Assembly + ########################################################################### + def __generate_mustache_json(self, filename): + # result = {'all_domains':self.__all_domains} + result = {"report_doc": self.__report_doc} + result["ineligible_domains"] = self.__ineligible_domains + result["domain_count"] = self.__domain_count + result["subdomain_count"] = self.__subdomain_count + result["base_domain_count"] = self.__base_domain_count + result["all_eligible_domains_count"] = self.__all_eligible_domains_count + result["eligible_domains_count"] = self.__eligible_domains_count + result["eligible_subdomains_count"] = self.__eligible_subdomains_count + result["https_compliance_list"] = self.__https_compliance_list + result["non_https_compliance_list"] = self.__non_https_compliance_list + result["title_date_tex"] = self.__generated_time.strftime("{%d}{%m}{%Y}") + result["agency"] = self.__agency + result["agency_id"] = self.__agency_id + result["strictly_forces_percentage"] = round( + self.__strictly_forces_count / self.__domain_count * 100.0, 1 + ) + result["downgrades_percentage"] = round( + self.__downgrades_count / self.__domain_count * 100.0, 1 + ) + result["hsts_percentage"] = self.__hsts_percentage + result["hsts_preloaded_percentage"] = round( + self.__hsts_preloaded_count / self.__domain_count * 100.0, 1 + ) + result["hsts_entire_domain_percentage"] = round( + self.__hsts_entire_domain_count / self.__domain_count * 100.0, 1 + ) + # result['strictly_forces_percentage'] = 0 + # result['downgrades_percentage'] = 0 + # result['hsts_preloaded_percentage'] = 0 + # result['hsts_entire_domain_percentage'] = 0 + result[ + "domain_has_no_weak_crypto_count" + ] = self.__domain_has_no_weak_crypto_count + result["has_no_weak_crypto_percentage"] = self.__has_no_weak_crypto_percentage + result["bod_1801_percentage"] = self.__bod_1801_percentage + result["bod_1801_count"] = self.__bod_1801_count + result[ + "domain_supports_https_count" + ] = self.__domain_supports_https_count # added + result["uses_https_percentage"] = self.__uses_https_percentage + result["enforces_https_percentage"] = self.__enforces_https_percentage + result["strictly_forces_count"] = self.__strictly_forces_count + result["domain_enforces_https_count"] = self.__domain_enforces_https_count + result["hsts_count"] = self.__hsts_count + result["hsts_preloaded_count"] = self.__hsts_preloaded_count + result["hsts_preload_ready_count"] = self.__hsts_preload_ready_count + result["domain_uses_strong_hsts_count"] = self.__domain_uses_strong_hsts_count + result["https_expired_cert_count"] = self.__https_expired_cert_count + result["https_bad_hostname_count"] = self.__https_bad_hostname_count + result["https_bad_chain_count"] = self.__https_bad_chain_count + result["hsts_low_max_age_count"] = self.__hsts_low_max_age_count + + self.__latex_escape_structure(result["report_doc"]) + + with open(filename, "w") as out: + out.write(json.dumps(result)) + + def __generate_latex(self, mustache_file, json_file, latex_file): + template = codecs.open(mustache_file, "r", encoding="utf-8").read() + + with codecs.open(json_file, "r", encoding="utf-8") as data_file: + data = json.load(data_file) + + r = chevron.render(template, data) + with codecs.open(latex_file, "w", encoding="utf-8") as output: + output.write(r) + + def __generate_final_pdf(self): + xelatex = ["/usr/bin/xelatex", REPORT_TEX] + # As usual, LaTeX requires two passes to correctly set all + # references in the document. + # + # Bandit frowns upon the use of subprocess, but we need it + # here. Hence the nosec. + subprocess.run(xelatex) # nosec B603 + subprocess.run(xelatex) # nosec B603 + + +def main(): + """Create PSHTT Agency Report PDF.""" + args = docopt(__doc__, version="v0.0.1") + db = db_from_config(DB_CONFIG_FILE) + + print(f"Generating HTTPS Report for {args['AGENCY']}...") + generator = ReportGenerator(db, args["AGENCY"], debug=args["--debug"]) + generator.generate_https_scan_report() + print("Done") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/report/graphs.py b/src/report/graphs.py old mode 100755 new mode 100644 similarity index 53% rename from report/graphs.py rename to src/report/graphs.py index 8186f0e..f2df878 --- a/report/graphs.py +++ b/src/report/graphs.py @@ -1,40 +1,50 @@ +"""Create graphs for Trustworthy Email reports.""" + +# Standard Python Libraries import math -import numpy as np -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import matplotlib as mpl -from matplotlib.patches import Rectangle, Ellipse -from matplotlib.collections import PatchCollection -from matplotlib.ticker import MaxNLocator -from matplotlib.dates import DateFormatter from textwrap import TextWrapper +# Third-Party Libraries +import matplotlib + +# This line is oddly located, but it must come before the +# matplotlib.pyplot import below. This also creates a bunch of flake8 +# E402 errors for the imports that follow because they are not at the +# top of the file, so I added a noqa comment for each of those lines. +matplotlib.use("Agg") +# Third-Party Libraries +from matplotlib.collections import PatchCollection # noqa: E402 +from matplotlib.dates import DateFormatter # noqa: E402 +from matplotlib.patches import Ellipse, Rectangle # noqa: E402 +import matplotlib.pyplot as plt # noqa: E402 +from matplotlib.ticker import MaxNLocator # noqa: E402 +import numpy as np # noqa: E402 + # Blue, Green, Yellow, Orange, Red, -BLUE = '#5c90ba' -GREEN = '#7bbe5e' -YELLOW = '#cfc666' -ORANGE = '#cf9c66' -RED = '#c66270' +BLUE = "#5c90ba" +GREEN = "#7bbe5e" +YELLOW = "#cfc666" +ORANGE = "#cf9c66" +RED = "#c66270" COLORS = (BLUE, YELLOW, ORANGE, RED, GREEN) # vuln colors first, then green -DARK_BLUE = '#3c698e' -DARK_GREEN = '#56943c' -DARK_YELLOW = '#b1a738' -DARK_ORANGE = '#b17638' -DARK_RED = '#a13a49' +DARK_BLUE = "#3c698e" +DARK_GREEN = "#56943c" +DARK_YELLOW = "#b1a738" +DARK_ORANGE = "#b17638" +DARK_RED = "#a13a49" COLORS_DARK = (DARK_BLUE, DARK_YELLOW, DARK_ORANGE, DARK_RED, DARK_GREEN) -LIGHT_BLUE = '#92b5d1' -LIGHT_GREEN = '#a8d494' -LIGHT_YELLOW = '#e1dca0' -LIGHT_ORANGE = '#e1c2a0' -LIGHT_RED = '#e8c0c5' +LIGHT_BLUE = "#92b5d1" +LIGHT_GREEN = "#a8d494" +LIGHT_YELLOW = "#e1dca0" +LIGHT_ORANGE = "#e1c2a0" +LIGHT_RED = "#e8c0c5" COLORS_LIGHT = (LIGHT_BLUE, LIGHT_YELLOW, LIGHT_ORANGE, LIGHT_RED, LIGHT_GREEN) -GREY_LIGHT = '#e8e8e8' -GREY_MID = '#cecece' -GREY_DARK = '#a1a1a1' +GREY_LIGHT = "#e8e8e8" +GREY_MID = "#cecece" +GREY_DARK = "#a1a1a1" PIE_COLORS = COLORS + COLORS_DARK + COLORS_LIGHT @@ -44,6 +54,7 @@ def setup(): + """Perform initial setup.""" # Get this from LaTeX using \showthe\columnwidth (see *.width # file) fig_width_pt = 505.89 @@ -52,20 +63,21 @@ def setup(): fig_width = fig_width_pt * inches_per_pt # width in inches fig_height = fig_width * golden_mean # height in inches fig_size = [fig_width, fig_height] - params = {'backend': 'pdf', - # 'font.family': 'sans-serif', - # 'font.sans-serif': ['Avenir Next'], - 'axes.labelsize': 10, - 'legend.fontsize': 8, - 'xtick.labelsize': 8, - 'ytick.labelsize': 8, - 'font.size': 10, - 'text.usetex': False, - 'figure.figsize': fig_size} + params = { + "backend": "pdf", + "axes.labelsize": 10, + "legend.fontsize": 8, + "xtick.labelsize": 8, + "ytick.labelsize": 8, + "font.size": 10, + "text.usetex": False, + "figure.figsize": fig_size, + } plt.rcParams.update(params) def wrapLabels(labels, width): + """Word-wrap labels.""" wrapper = TextWrapper(width=width, break_long_words=False) result = [] for label in labels: @@ -73,49 +85,62 @@ def wrapLabels(labels, width): return result -class MyMessage(object): +class MyMessage: + """A message chart.""" + def __init__(self, message): + """Initialize.""" self.message = message def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(1) fig.set_size_inches(fig.get_size_inches() * size) ax = fig.add_subplot(1, 1, 1) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) - ax.spines['left'].set_visible(False) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - ax.spines['bottom'].set_visible(False) - ax.text(0.5, 0.5, self.message, - horizontalalignment='center', - verticalalignment='center', - fontsize=20 * size, color=DARK_GREEN, - transform=ax.transAxes) - plt.savefig(filename + '.pdf') + ax.spines["left"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.text( + 0.5, + 0.5, + self.message, + horizontalalignment="center", + verticalalignment="center", + fontsize=20 * size, + color=DARK_GREEN, + transform=ax.transAxes, + ) + plt.savefig(filename + ".pdf") plt.close() -class MyStackedBar(object): +class MyStackedBar: + """A stacked bar chart.""" + def __init__(self, data, ylabels, dataLabels): + """Initialize.""" self.data = data self.ylabels = ylabels self.dataLabels = dataLabels def plot(self, filename, size=1.0): + """Create the graph.""" pos = np.arange(len(self.ylabels))[::-1] fig = plt.figure(1) fig.set_size_inches(fig.get_size_inches() * size) # fig.subplots_adjust(left=0.15, bottom=0.15) ax = fig.add_subplot(1, 1, 1) - plt.xlabel('Vulnerabilities') + plt.xlabel("Vulnerabilities") majorLocator = MaxNLocator(nbins=5, integer=True) # only mark integers ax.xaxis.set_major_locator(majorLocator) - ax.spines['left'].set_visible(False) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) + ax.spines["left"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) ax.xaxis.tick_bottom() ax.yaxis.tick_left() @@ -123,19 +148,30 @@ def plot(self, filename, size=1.0): bars = [] i = 0 for dataset in self.data: - p = ax.barh(pos, dataset, align='center', color=COLORS[i], - edgecolor='white', left=lefts) + p = ax.barh( + pos, + dataset, + align="center", + color=COLORS[i], + edgecolor="white", + left=lefts, + ) lefts = map(lambda x, y: x + y, lefts, dataset) i += 1 bars.append(p) plt.yticks(pos, self.ylabels, rotation=None, fontsize=8) try: - leg = plt.legend(bars, self.dataLabels, ncol=len(self.dataLabels), - loc='upper center', fancybox=True, - prop={'size': 4}) + leg = plt.legend( + bars, + self.dataLabels, + ncol=len(self.dataLabels), + loc="upper center", + fancybox=True, + prop={"size": 4}, + ) leg.get_frame().set_alpha(0.5) - except IndexError as e: + except IndexError: pass # if there are no bars, the legend will throw a IndexError # it should be safe to ignore, but there will be no legend @@ -147,27 +183,44 @@ def plot(self, filename, size=1.0): # decimal point and 0 by converting width to int type width = int(rect.get_width()) - labelString = '{:,d}'.format(width) + labelString = f"{width:,}" # TODO handle too labels getting squeezed, need box # width in points - if (width > 0): + if width > 0: xloc = rect.get_x() + 0.5 * width - clr = 'white' + clr = "white" # Center the text vertically in the bar yloc = rect.get_y() + rect.get_height() / 2.0 - ax.annotate(labelString, xy=(xloc, yloc), xycoords='data', - xytext=(-4, 0), textcoords='offset points', - size=12, va='center', weight='bold', color=clr) + ax.annotate( + labelString, + xy=(xloc, yloc), + xycoords="data", + xytext=(-4, 0), + textcoords="offset points", + size=12, + va="center", + weight="bold", + color=clr, + ) ax.set_ylim([-0.5, 5]) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class MyBar(object): - def __init__(self, series, yscale='linear', - bigLabels=False, barSeverities=None, legendLabels=None): +class MyBar: + """A bar chart.""" + + def __init__( + self, + series, + yscale="linear", + bigLabels=False, + barSeverities=None, + legendLabels=None, + ): + """Initialize.""" self.series = series self.yscale = yscale self.bigLabels = bigLabels @@ -175,6 +228,7 @@ def __init__(self, series, yscale='linear', self.legendLabels = legendLabels def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(1) fig.set_size_inches(fig.get_size_inches() * size) @@ -198,40 +252,58 @@ def plot(self, filename, size=1.0): legendColors = [] for i in range(len(self.legendLabels)): legendColors.append(COLORS[i]) - dummy_legend_rects = plt.bar(pos, self.series.values, - align='center', - color=legendColors, - edgecolor='white', width=0.5) - leg = plt.legend(dummy_legend_rects, self.legendLabels, - ncol=len(self.legendLabels), - loc='upper center', - fancybox=True, prop={'size': 4}, - bbox_to_anchor=(0.5, 1.2)) + dummy_legend_rects = plt.bar( + pos, + self.series.values, + align="center", + color=legendColors, + edgecolor="white", + width=0.5, + ) + leg = plt.legend( + dummy_legend_rects, + self.legendLabels, + ncol=len(self.legendLabels), + loc="upper center", + fancybox=True, + prop={"size": 4}, + bbox_to_anchor=(0.5, 1.2), + ) leg.get_frame().set_alpha(0.5) - rects = plt.bar(pos, self.series.values, align='center', - color=barColors, edgecolor='white', width=0.5) + rects = plt.bar( + pos, + self.series.values, + align="center", + color=barColors, + edgecolor="white", + width=0.5, + ) else: - rects = plt.bar(pos, self.series.values, align='center', - color=BLUE, edgecolor='white', width=0.5) + rects = plt.bar( + pos, + self.series.values, + align="center", + color=BLUE, + edgecolor="white", + width=0.5, + ) if self.bigLabels: - plt.xticks(pos, wrapLabels(self.series.index, 24), - rotation=55, fontsize=7) + plt.xticks(pos, wrapLabels(self.series.index, 24), rotation=55, fontsize=7) # Extremely nice function to auto-rotate the x axis labels. # It was made for dates (hence the name) but it works # for any long x tick labels # fig.autofmt_xdate() else: - plt.xticks(pos, wrapLabels(self.series.index, 6), - rotation=None, fontsize=8) + plt.xticks(pos, wrapLabels(self.series.index, 6), rotation=None, fontsize=8) ax.yaxis.grid(False) ax.yaxis.tick_left() # ticks only on left ax.yaxis.set_visible(False) ax.xaxis.tick_bottom() # ticks only on bottom - ax.spines['left'].set_visible(False) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) + ax.spines["left"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) for rect in rects: # Rectangle widths are already integer-valued but are @@ -247,29 +319,47 @@ def plot(self, filename, size=1.0): if axes_coords[1] < 0.30: # above box - color = 'black' + color = "black" offset = (0, 7) else: # in box - color = 'white' + color = "white" offset = (0, -14) - labelString = '{:,d}'.format(yloc) + labelString = "{:,d}".format(yloc) - ax.annotate(labelString, xy=(xloc, yloc), xycoords='data', - xytext=offset, textcoords='offset points', - size=12, ha='center', weight='bold', color=color - ) + ax.annotate( + labelString, + xy=(xloc, yloc), + xycoords="data", + xytext=offset, + textcoords="offset points", + size=12, + ha="center", + weight="bold", + color=color, + ) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class MyDistributionBar(object): - def __init__(self, series, yscale='linear', xlabel=None, ylabel=None, - final_bucket_accumulate=False, - x_major_tick_count=10, region_colors=[], x_limit_extra=0): +class MyDistributionBar: + """A distribution bar chart.""" + + def __init__( + self, + series, + yscale="linear", + xlabel=None, + ylabel=None, + final_bucket_accumulate=False, + x_major_tick_count=10, + region_colors=[], + x_limit_extra=0, + ): + """Initialize.""" self.series = series self.yscale = yscale self.xlabel = xlabel @@ -282,6 +372,7 @@ def __init__(self, series, yscale='linear', xlabel=None, ylabel=None, self.x_limit_extra = x_limit_extra def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(figsize=(8, 2.75)) fig.set_size_inches(fig.get_size_inches() * size) ax = fig.add_subplot(1, 1, 1) @@ -299,10 +390,16 @@ def plot(self, filename, size=1.0): tick_labels = list(self.series.index) if self.final_bucket_accumulate: - tick_labels[-1] = '{}+'.format(tick_labels[-1]) - - plt.bar(pos, self.series.values, tick_label=tick_labels, - align='center', color='#000000', edgecolor='#000000') + tick_labels[-1] = f"{tick_labels[-1]}+" + + plt.bar( + pos, + self.series.values, + tick_label=tick_labels, + align="center", + color="#000000", + edgecolor="#000000", + ) y_max = ax.get_ylim()[1] # Colorize regions and add dividing lines if region_colors @@ -310,21 +407,39 @@ def plot(self, filename, size=1.0): previous_day = 0 for (day, bgcolor) in self.region_colors: # draw reference lines - plt.axvline(x=day, color='#777777', linewidth=0.5) - ax.annotate('{} Days '.format(day), xy=(day - 1, y_max), - rotation='vertical', fontsize=7, color='#666666', - ha='right', va='top') + plt.axvline(x=day, color="#777777", linewidth=0.5) + ax.annotate( + "{day} Days ", + xy=(day - 1, y_max), + rotation="vertical", + fontsize=7, + color="#666666", + ha="right", + va="top", + ) ax.add_patch( - Rectangle((previous_day, 0), day - previous_day, y_max, - facecolor=bgcolor, alpha=0.4, edgecolor=None, - zorder=0)) + Rectangle( + (previous_day, 0), + day - previous_day, + y_max, + facecolor=bgcolor, + alpha=0.4, + edgecolor=None, + zorder=0, + ) + ) previous_day = day - ax.add_patch(Rectangle((previous_day, 0), - (self.series.index[-1] - previous_day + - self.x_limit_extra), - y_max, - facecolor='#000000', alpha=0.4, edgecolor=None, - zorder=0)) + ax.add_patch( + Rectangle( + (previous_day, 0), + (self.series.index[-1] - previous_day + self.x_limit_extra), + y_max, + facecolor="#000000", + alpha=0.4, + edgecolor=None, + zorder=0, + ) + ) tick_interval = len(self.series) / (self.x_major_tick_count - 1) for i, tick in enumerate(ax.xaxis.get_major_ticks()): @@ -332,32 +447,36 @@ def plot(self, filename, size=1.0): tick.set_visible(False) else: tick.set_visible(True) - tick.set_label('{}'.format(self.series.index[i])) + tick.set_label(str(self.series.index[i])) if self.final_bucket_accumulate: # Show final tick (just in case it isn't already visible) tick.set_visible(True) - ax.tick_params(direction='out') # put ticks on the outside of the axes + ax.tick_params(direction="out") # put ticks on the outside of the axes ax.yaxis.grid(True) ax.yaxis.tick_left() # ticks only on left ax.yaxis.set_visible(True) ax.xaxis.tick_bottom() # ticks only on bottom - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(filename + ".pdf") plt.close() -class MyPie(object): +class MyPie: + """A pie chart.""" + def __init__(self, data, labels, explode=None, showValue=False): + """Initialize.""" self.data = data self.labels = wrapLabels(labels, 20) self.explode = explode self.showValue = showValue def left_right(self, trips): + """Lefts and rights.""" lefts = [] rights = [] for inner, outer, wedge in trips: @@ -369,6 +488,7 @@ def left_right(self, trips): return lefts, rights def too_close(self, trips): + """Determine if we are too close.""" if len(trips) <= 1: return False for (inner, outer, wedge) in trips: @@ -377,6 +497,7 @@ def too_close(self, trips): return False def wedge_edge(self, wedge): + """Get the edge of a wedge.""" theta = np.pi * (wedge.theta1 + wedge.theta2) / 180 wedge_x, wedge_y = wedge.center x = wedge_x + wedge.r * np.cos(theta / 2.0) @@ -384,6 +505,7 @@ def wedge_edge(self, wedge): return (x, y) def layout_labels(self, trips, ax, right_side=True): + """Layout the labels.""" bottom, top = ax.get_ylim() left, right = ax.get_xlim() if right_side: @@ -391,60 +513,80 @@ def layout_labels(self, trips, ax, right_side=True): else: new_ys = np.linspace(top, bottom * 0.8, len(trips)) # distribute - trips.sort(key=lambda x: self.wedge_edge(x[2])[1], - reverse=not right_side) # sort by wedge_edge y + trips.sort( + key=lambda x: self.wedge_edge(x[2])[1], reverse=not right_side + ) # sort by wedge_edge y for (inner, outer, wedge), y in zip(trips, new_ys): outer.set_visible(False) # hide the old label if wedge.theta2 - wedge.theta1 > TOO_SMALL_WEDGE: new_text = outer.get_text() # transfer old label text else: - new_text = '%s\n(%s)' % (outer.get_text(), inner.get_text()) + new_text = f"{outer.get_text()}\n({inner.get_text()})" # too small to show inner label, add to outer inner.set_visible(False) # create annotation from pos to wedge xy = self.wedge_edge(wedge) if right_side: - ax.annotate(new_text, xy=xy, xycoords='data', - xytext=(right * 1.8, y), textcoords='data', - size=6, va='top', ha='right', - arrowprops=dict(arrowstyle="-", mutation_scale=30, - connectionstyle="arc3"), - ) + ax.annotate( + new_text, + xy=xy, + xycoords="data", + xytext=(right * 1.8, y), + textcoords="data", + size=6, + va="top", + ha="right", + arrowprops=dict( + arrowstyle="-", mutation_scale=30, connectionstyle="arc3" + ), + ) else: - ax.annotate(new_text, xy=xy, xycoords='data', - xytext=(left * 1.8, y), textcoords='data', - size=6, va='top', ha='left', - arrowprops=dict(arrowstyle="-", mutation_scale=30, - connectionstyle="arc3"), - ) + ax.annotate( + new_text, + xy=xy, + xycoords="data", + xytext=(left * 1.8, y), + textcoords="data", + size=6, + va="top", + ha="left", + arrowprops=dict( + arrowstyle="-", mutation_scale=30, connectionstyle="arc3" + ), + ) def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(1) fig.set_size_inches(fig.get_size_inches() * size) ax = fig.add_subplot(1, 1, 1) fig.subplots_adjust(left=0.25, right=0.75) wedges, outer_labels, inner_labels = plt.pie( - self.data, colors=PIE_COLORS, - explode=self.explode, labels=self.labels, - labeldistance=1.15, autopct='', pctdistance=0.65, shadow=False + self.data, + colors=PIE_COLORS, + explode=self.explode, + labels=self.labels, + labeldistance=1.15, + autopct="", + pctdistance=0.65, + shadow=False, ) for wedge in wedges: - wedge.set_edgecolor('white') + wedge.set_edgecolor("white") i = 0 total = sum(self.data) for label in inner_labels: label.set_fontsize(16.0 * size) # inner value label size - label.set_color('white') + label.set_color("white") if self.showValue: - label.set_text('{0}, {1:.0f}%'.format( - self.data[i], - float(self.data[i]) / total * 100.0) + label.set_text( + f"{self.data[i]}, {(float(self.data[i]) / total * 100.0):.0f}%" ) else: - label.set_text('%1.1f%%' % (self.data[i])) + label.set_text(f"{self.data[i]:1.1f}%") i += 1 for label in outer_labels: @@ -458,75 +600,95 @@ def plot(self, filename, size=1.0): if self.too_close(rights): self.layout_labels(rights, ax, True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class MyColorBar(object): - def __init__(self, agencyName, agencyScore, federalScore, label='Average'): +class MyColorBar: + """A color bar chart.""" + + def __init__(self, agencyName, agencyScore, federalScore, label="Average"): + """Initialize.""" self.agencyName = agencyName self.agencyScore = agencyScore self.federalScore = federalScore self.label = label def plot(self, filename, size=1.0): - cmap = mpl.cm.RdYlGn_r - norm = mpl.colors.Normalize(vmin=0, vmax=10) + """Create the graph.""" + cmap = matplotlib.cm.RdYlGn_r + norm = matplotlib.colors.Normalize(vmin=0, vmax=10) fig = plt.figure(figsize=(8, 2)) fig.set_size_inches(fig.get_size_inches() * size) - plt.axis('off') + plt.axis("off") ax2 = fig.add_axes([0.05, 0.25, 0.9, 0.15]) - cb1 = mpl.colorbar.ColorbarBase(ax2, cmap=cmap, - norm=norm, - orientation='horizontal') - cb1.set_label('CVSS Score') + cb1 = matplotlib.colorbar.ColorbarBase( + ax2, cmap=cmap, norm=norm, orientation="horizontal" + ) + cb1.set_label("CVSS Score") cb1.outline.set_visible(False) ax2.xaxis.tick_bottom() - if (self.agencyScore <= self.federalScore): + if self.agencyScore <= self.federalScore: agencyTextXY = (0.25, 0.66) federalTextXY = (0.75, 0.66) else: agencyTextXY = (0.75, 0.66) federalTextXY = (0.25, 0.66) - agencyLabel = '%s %s\n%1.2f' % (self.agencyName, - self.label, - self.agencyScore) - federalLabel = 'Federal %s\n%1.2f' % (self.label, self.federalScore) + agencyLabel = f"{self.agencyName} {self.label}\n{self.agencyScore:1.2f}" + federalLabel = f"Federal {self.label}\n{self.federalScore:1.2f}" ax2.annotate( - agencyLabel, xy=(self.agencyScore / 10, 1), - xycoords='data', - xytext=agencyTextXY, textcoords='figure fraction', - size=14, ha='center', + agencyLabel, + xy=(self.agencyScore / 10, 1), + xycoords="data", + xytext=agencyTextXY, + textcoords="figure fraction", + size=14, + ha="center", bbox=dict(boxstyle="round", fc="1.0", alpha=0.9), - arrowprops=dict(arrowstyle="fancy", mutation_scale=30, - fc="0.1", ec="none", - patchB=ax2, - connectionstyle="angle3,angleA=0,angleB=-90"), + arrowprops=dict( + arrowstyle="fancy", + mutation_scale=30, + fc="0.1", + ec="none", + patchB=ax2, + connectionstyle="angle3,angleA=0,angleB=-90", + ), ) ax2.annotate( - federalLabel, xy=(self.federalScore / 10, 1), - xycoords='data', - xytext=federalTextXY, textcoords='figure fraction', - size=14, ha='center', + federalLabel, + xy=(self.federalScore / 10, 1), + xycoords="data", + xytext=federalTextXY, + textcoords="figure fraction", + size=14, + ha="center", bbox=dict(boxstyle="round", fc="1.0", alpha=0.9), - arrowprops=dict(arrowstyle="fancy", mutation_scale=30, - fc="0.4", ec="none", - patchB=ax2, - connectionstyle="angle3,angleA=0,angleB=-90"), + arrowprops=dict( + arrowstyle="fancy", + mutation_scale=30, + fc="0.4", + ec="none", + patchB=ax2, + connectionstyle="angle3,angleA=0,angleB=-90", + ), ) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class MyLine(object): - def __init__(self, data_frame, linecolors, yscale='linear', - xlabel=None, ylabel=None): +class MyLine: + """A line chart.""" + + def __init__( + self, data_frame, linecolors, yscale="linear", xlabel=None, ylabel=None + ): + """Initialize.""" self.df = data_frame self.linecolors = linecolors self.yscale = yscale @@ -534,6 +696,7 @@ def __init__(self, data_frame, linecolors, yscale='linear', self.ylabel = ylabel def plot(self, filename, size=1.0, figsize=None): + """Create the graph.""" if figsize: fig = plt.figure(figsize=figsize) else: @@ -548,41 +711,55 @@ def plot(self, filename, size=1.0, figsize=None): colors = (c for c in self.linecolors) for col in self.df.columns: series = self.df[col] - series.plot(style='.-', color=colors.next(), - linewidth=2, markersize=10) - leg = plt.legend(fancybox=True, loc='best') + series.plot(style=".-", color=colors.next(), linewidth=2, markersize=10) + leg = plt.legend(fancybox=True, loc="best") # set the alpha value of the legend: it will be translucent leg.get_frame().set_alpha(0.5) # Force y-axis to go to 0 (must be done after plot) ax.set_ylim(ymin=0) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class MyPentaLine(object): +class MyPentaLine: + """A penta line chart.""" + def __init__(self, data_frame): + """Initialize.""" self.df = data_frame - def plot_four(self, axis, column, color1, color2, - last=False, tick_right=False): - axis.text(0.025, 0.75, column.title(), fontsize='small', - horizontalalignment='left', - transform=axis.transAxes) + def plot_four(self, axis, column, color1, color2, last=False, tick_right=False): + """Plot four of them.""" + axis.text( + 0.025, + 0.75, + column.title(), + fontsize="small", + horizontalalignment="left", + transform=axis.transAxes, + ) yloc = plt.MaxNLocator(4, integer=True) axis.yaxis.set_major_locator(yloc) if tick_right: axis.yaxis.tick_right() - for prefix, style in [('', 'solid'), ('world_', 'dotted')]: + for prefix, style in [("", "solid"), ("world_", "dotted")]: for col, color in [ - ('host_count', color1), - ('vulnerable_host_count', color2) + ("host_count", color1), + ("vulnerable_host_count", color2), ]: df = self.df[prefix + column] * 1.0 / self.df[prefix + col] df = df.fillna(0) - df.plot(ax=axis, label=prefix + column, grid=False, - color=color, linewidth=2, linestyle=style, - marker='.', markersize=10) + df.plot( + ax=axis, + label=prefix + column, + grid=False, + color=color, + linewidth=2, + linestyle=style, + marker=".", + markersize=10, + ) if not last: # axis.tick_params(axis='x', labelcolor='white') #nope @@ -590,55 +767,57 @@ def plot_four(self, axis, column, color1, color2, axis.xaxis.set_visible(False) # kinda: lost upper ticks def plot(self, filename, size=1.0): + """Create the graph.""" # Three subplots sharing both x/y axes fig, axes = plt.subplots(nrows=5, ncols=1, sharex=True, sharey=True) fig.set_size_inches(fig.get_size_inches() * size) - self.plot_four(axes[0], 'total', LIGHT_GREEN, DARK_GREEN) - self.plot_four(axes[1], 'critical', LIGHT_RED, DARK_RED, - tick_right=True) - self.plot_four(axes[2], 'high', LIGHT_ORANGE, DARK_ORANGE) - self.plot_four(axes[3], 'medium', LIGHT_YELLOW, DARK_YELLOW, - tick_right=True) - self.plot_four(axes[4], 'low', LIGHT_BLUE, DARK_BLUE, last=True) + self.plot_four(axes[0], "total", LIGHT_GREEN, DARK_GREEN) + self.plot_four(axes[1], "critical", LIGHT_RED, DARK_RED, tick_right=True) + self.plot_four(axes[2], "high", LIGHT_ORANGE, DARK_ORANGE) + self.plot_four(axes[3], "medium", LIGHT_YELLOW, DARK_YELLOW, tick_right=True) + self.plot_four(axes[4], "low", LIGHT_BLUE, DARK_BLUE, last=True) # fig.subplots_adjust(bottom=0.20) # build a generic legend to represent all the subplots - dark_solid_line = plt.Line2D((0, 1), (0, 0), marker='.', - color=GREY_DARK) - light_solid_line = plt.Line2D((0, 1), (0, 0), marker='.', - color=GREY_MID) - dark_dotted_line = plt.Line2D((0, 1), (0, 0), marker='.', - linestyle='dotted', color=GREY_DARK) - light_dotted_line = plt.Line2D((0, 1), (0, 0), marker='.', - linestyle='dotted', color=GREY_MID) + dark_solid_line = plt.Line2D((0, 1), (0, 0), marker=".", color=GREY_DARK) + light_solid_line = plt.Line2D((0, 1), (0, 0), marker=".", color=GREY_MID) + dark_dotted_line = plt.Line2D( + (0, 1), (0, 0), marker=".", linestyle="dotted", color=GREY_DARK + ) + light_dotted_line = plt.Line2D( + (0, 1), (0, 0), marker=".", linestyle="dotted", color=GREY_MID + ) fig.legend( - [ - dark_solid_line, - light_solid_line, - dark_dotted_line, - light_dotted_line - ], - [ - 'Vulnerable Hosts', - 'All Hosts', - 'CH Vulnerable Hosts', - 'CH All Hosts' - ], - 'lower center', ncol=4, fontsize='x-small') + [dark_solid_line, light_solid_line, dark_dotted_line, light_dotted_line], + ["Vulnerable Hosts", "All Hosts", "CH Vulnerable Hosts", "CH All Hosts"], + "lower center", + ncol=4, + fontsize="x-small", + ) # Fine-tune figure; make subplots close to each other plt.grid(False) # following line doesn't work with fig.set_tight_layout it # does work with plt.tight_layout(), but generates a warning fig.subplots_adjust(hspace=0) - plt.savefig(filename + '.pdf', bbox_inches='tight', pad_inches=0.25) + plt.savefig(f"{filename}.pdf", bbox_inches="tight", pad_inches=0.25) plt.close() -class MyStackedLine(object): - def __init__(self, data_frame, yscale='linear', xlabel=None, ylabel=None, - data_labels=None, data_fill_colors=None): +class MyStackedLine: + """A stacked line chart.""" + + def __init__( + self, + data_frame, + yscale="linear", + xlabel=None, + ylabel=None, + data_labels=None, + data_fill_colors=None, + ): + """Initialize.""" self.df = data_frame self.yscale = yscale self.xlabel = xlabel @@ -647,47 +826,61 @@ def __init__(self, data_frame, yscale='linear', xlabel=None, ylabel=None, self.data_fill_colors = data_fill_colors def plot(self, filename, size=1.0): + """Create the graph.""" # TODO Interpolate this data to get a nicer curve df = self.df fig, axes = plt.subplots(figsize=(8, 2.75)) fig.set_size_inches(fig.get_size_inches() * size) - axes.stackplot(df.index, df['young'].values.astype(np.int), - df['old'].values.astype(np.int), - labels=self.data_labels, - colors=self.data_fill_colors, alpha=0.2) + axes.stackplot( + df.index, + df["young"].values.astype(np.int), + df["old"].values.astype(np.int), + labels=self.data_labels, + colors=self.data_fill_colors, + alpha=0.2, + ) # Limit x-axis to 8 ticks; doesn't seem to work with Date data # :( # axes.locator_params(axis='x', nbins=8, tight=True) axes.yaxis.tick_left() # ticks only on left axes.yaxis.grid(True) axes.xaxis.tick_bottom() # ticks only on bottom - axes.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d')) + axes.xaxis.set_major_formatter(DateFormatter("%Y-%m-%d")) axes.set_axisbelow(True) - axes.spines['top'].set_visible(False) - axes.spines['right'].set_visible(False) + axes.spines["top"].set_visible(False) + axes.spines["right"].set_visible(False) if self.xlabel: plt.xlabel(self.xlabel) if self.ylabel: plt.ylabel(self.ylabel) - leg = plt.legend(fancybox=True, loc='lower center', - ncol=2, prop={'size': 6}, bbox_to_anchor=(0.5, 1.0)) + leg = plt.legend( + fancybox=True, + loc="lower center", + ncol=2, + prop={"size": 6}, + bbox_to_anchor=(0.5, 1.0), + ) # set the alpha value of the legend: it will be translucent leg.get_frame().set_alpha(0.5) - for i, tick in enumerate(axes.xaxis.get_major_ticks()): + for _, tick in enumerate(axes.xaxis.get_major_ticks()): tick.label.set_fontsize(6) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class Boxes(object): - def __init__(self, dataframe, min_cols=25, other_color='green'): +class Boxes: + """A box graph.""" + + def __init__(self, dataframe, min_cols=25, other_color="green"): + """Initialize.""" self.df = dataframe self.min_cols = min_cols self.cols = None self.other_color = other_color def _calculate_cols(self, fig): + """Calculate the columns.""" w, h = fig.get_size_inches() fig_area = w * h data_size = self.df.sum().sum() @@ -696,6 +889,7 @@ def _calculate_cols(self, fig): self.cols = max(self.min_cols, math.ceil(w / cell_size_in) + 1) def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(1) fig.set_size_inches(fig.get_size_inches() * size) self._calculate_cols(fig) @@ -708,10 +902,10 @@ def plot(self, filename, size=1.0): ax = fig.add_subplot(1, 1, 1) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) - ax.spines['left'].set_visible(False) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - ax.spines['bottom'].set_visible(False) + ax.spines["left"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) boxes = [] facecolors = [] @@ -725,11 +919,8 @@ def plot(self, filename, size=1.0): color = color_iter.next() # iterate remainding values of row for tup_i, count in enumerate(tup): - for k in range(count): - center = [ - i * width + (width / 2), - 1 - (j * height - (height / 2)) - ] + for _ in range(count): + center = [i * width + (width / 2), 1 - (j * height - (height / 2))] bottom_left = [i * width, 1 - (j * height)] r = Rectangle(bottom_left, width, height) boxes.append(r) @@ -747,21 +938,24 @@ def plot(self, filename, size=1.0): i = 0 j += 1 - patches = PatchCollection(boxes, facecolors=facecolors, - edgecolors='white') + patches = PatchCollection(boxes, facecolors=facecolors, edgecolors="white") ax.add_collection(patches) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() return self.cols -class Histogram(object): +class Histogram: + """A histogram.""" + def __init__(self, bin_counts, highlight_bin): + """Initialize.""" self.bin_counts = bin_counts self.highlight_bin = highlight_bin def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(1) fig.set_size_inches(fig.get_size_inches() * size) @@ -770,11 +964,27 @@ def plot(self, filename, size=1.0): pos = np.arange(len(self.bin_counts)) # the bar centers on the x axis colors = [GREY_LIGHT] * len(self.bin_counts) highlight_colors = [ - GREEN, GREEN, BLUE, BLUE, YELLOW, YELLOW, ORANGE, ORANGE, RED, RED + GREEN, + GREEN, + BLUE, + BLUE, + YELLOW, + YELLOW, + ORANGE, + ORANGE, + RED, + RED, ] colors[self.highlight_bin] = highlight_colors[self.highlight_bin] - plt.bar(pos, self.bin_counts, align='edge', - color=colors, edgecolor='white', linewidth=1, width=1) + plt.bar( + pos, + self.bin_counts, + align="edge", + color=colors, + edgecolor="white", + linewidth=1, + width=1, + ) ax.yaxis.grid(False) # ax.spines['left'].set_visible(False) @@ -783,10 +993,10 @@ def plot(self, filename, size=1.0): ax.yaxis.set_visible(False) # ax.xaxis.set_visible(False) ax.set_frame_on(False) - ax.tick_params(top='off', bottom='off') + ax.tick_params(top="off", bottom="off") ax.set_xlim(0, len(self.bin_counts)) ax.xaxis.label.set_fontsize(18) - ax.set_xlabel('CVSS') + ax.set_xlabel("CVSS") tick_colors = [GREEN, BLUE, YELLOW, ORANGE, RED, RED] for i, tick in enumerate(ax.xaxis.get_major_ticks()): tick.label.set_fontsize(18) @@ -799,13 +1009,17 @@ def plot(self, filename, size=1.0): # tick.label.set_visible(False) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class Histogram2(object): - def __init__(self, histogram_data, bar_colors, tick_colors, - x_label=None, y_label=None): +class Histogram2: + """A histogram variant.""" + + def __init__( + self, histogram_data, bar_colors, tick_colors, x_label=None, y_label=None + ): + """Initialize.""" self.histogram_data = histogram_data self.bar_colors = bar_colors self.tick_colors = tick_colors @@ -813,21 +1027,28 @@ def __init__(self, histogram_data, bar_colors, tick_colors, self.y_label = y_label def plot(self, filename, size=1.0): + """Create the graph.""" fig = plt.figure(figsize=(8, 2.5)) fig.set_size_inches(fig.get_size_inches() * size) ax = fig.add_subplot(1, 1, 1) - plt.bar(self.histogram_data[1][:-1], self.histogram_data[0], - align='edge', color=self.bar_colors, edgecolor='white', - linewidth=1, width=0.5) + plt.bar( + self.histogram_data[1][:-1], + self.histogram_data[0], + align="edge", + color=self.bar_colors, + edgecolor="white", + linewidth=1, + width=0.5, + ) # ax.set_frame_on(False) plt.xticks(self.histogram_data[1]) # Put a tick at edge of each bucket - ax.tick_params(top='off', bottom='off') + ax.tick_params(top="off", bottom="off") ax.yaxis.tick_left() # ticks only on left ax.yaxis.grid(True) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) # ax.spines['bottom'].set_visible(False) ax.xaxis.label.set_fontsize(10) if self.x_label: @@ -843,99 +1064,124 @@ def plot(self, filename, size=1.0): tick.label.set_color(self.tick_colors[i]) fig.set_tight_layout(True) - plt.savefig(filename + '.pdf') + plt.savefig(f"{filename}.pdf") plt.close() -class MyTrustyBar(object): +class MyTrustyBar: + """Yet another bar chart.""" + def __init__(self, percentage_list, label_list, fill_color, title=None): + """Initialize.""" self.title = title self.percentage_list = percentage_list self.label_list = label_list self.fill_color = fill_color def plot(self, filename): + """Create the graph.""" # the x locations for the groups x_left_indices = np.arange(len(self.percentage_list)) - width = 0.5 # the width of the bars: can also be len(x) sequence - - p1 = plt.bar(x_left_indices, self.percentage_list, width, - color=self.fill_color, edgecolor='none') - plt.bar(x_left_indices, [100-x for x in self.percentage_list], - width, color='w', bottom=self.percentage_list, - edgecolor='none') + width = 0.5 # the width of the bars: can also be len(x) sequence + + p1 = plt.bar( + x_left_indices, + self.percentage_list, + width, + color=self.fill_color, + edgecolor="none", + ) + plt.bar( + x_left_indices, + [100 - x for x in self.percentage_list], + width, + color="w", + bottom=self.percentage_list, + edgecolor="none", + ) - plt.ylabel('Percent (%)', fontsize=14, style='italic') + plt.ylabel("Percent (%)", fontsize=14, style="italic") if self.title: - plt.title(self.title, fontsize=20, fontweight='bold', y=1.07) - plt.xticks(x_left_indices, self.label_list, - fontsize=14, style='italic') + plt.title(self.title, fontsize=20, fontweight="bold", y=1.07) + plt.xticks(x_left_indices, self.label_list, fontsize=14, style="italic") plt.yticks(np.arange(10, 100, 10), fontsize=13) for bar in p1: height = bar.get_height() - plt.text(bar.get_x() + bar.get_width()/2.0, 1.0*height, - '%d' % int(round(height, 0)) + '%', ha='center', - va='bottom', fontsize=15) + plt.text( + bar.get_x() + bar.get_width() / 2.0, + 1.0 * height, + f"{round(height)}%", + ha="center", + va="bottom", + fontsize=15, + ) # plt.show() - plt.tight_layout() # trims margins down nicely - plt.savefig(filename + '.pdf') # bbox_inches=0, pad_inches=0 + plt.tight_layout(rect=[0, 0, 1, 0.98]) # trims margins down nicely + plt.savefig(f"{filename}.pdf") # bbox_inches=0, pad_inches=0 plt.close() -class MyDonutPie(object): +class MyDonutPie: + """A donut pie chart.""" + def __init__(self, percentage_full, label, fill_color): + """Initialize.""" self.percentage_full = percentage_full self.label = label self.fill_color = fill_color def plot(self, filename, size=1.0): + """Create the graph.""" # Override default figsize (make square), then scale by size parameter fig_width = fig_height = 4.0 * size - plt.rcParams.update({'figure.figsize': [fig_width, fig_height]}) + plt.rcParams.update({"figure.figsize": [fig_width, fig_height]}) # Minimize whitespace around chart - extent = mpl.transforms.Bbox(((0, 0), (fig_width, fig_height))) + extent = matplotlib.transforms.Bbox(((0, 0), (fig_width, fig_height))) - labels = '', '' + labels = "", "" sizes = [100 - self.percentage_full, self.percentage_full] - colors = ['white', self.fill_color] + colors = ["white", self.fill_color] # Set edge color to black # # See # https://matplotlib.org/users/dflt_style_changes.html#patch-edges-and-color - plt.rcParams['patch.force_edgecolor'] = True - plt.rcParams['patch.facecolor'] = 'b' + plt.rcParams["patch.force_edgecolor"] = True + plt.rcParams["patch.facecolor"] = "b" # autopct='%1.1f%%' - plt.pie(sizes, labels=labels, colors=colors, shadow=False, - startangle=90) + plt.pie(sizes, labels=labels, colors=colors, shadow=False, startangle=90) # Draw a circle at the center of pie to make it look like a donut - centre_circle = plt.Circle((0, 0), 0.75, color='black', fc='white', - linewidth=1.25) + centre_circle = plt.Circle( + (0, 0), 0.75, color="black", fc="white", linewidth=1.25 + ) fig = plt.gcf() fig.gca().add_artist(centre_circle) - plt.text(0, 0.15, str(self.percentage_full) + '%', - horizontalalignment='center', verticalalignment='center', - fontsize=50) - plt.text(0, -0.3, self.label, - horizontalalignment='center', - verticalalignment='center', - fontsize=19.5, fontweight='bold') + plt.text( + 0, + 0.15, + str(self.percentage_full) + "%", + horizontalalignment="center", + verticalalignment="center", + fontsize=50, + ) + plt.text( + 0, + -0.3, + self.label, + horizontalalignment="center", + verticalalignment="center", + fontsize=19.5, + fontweight="bold", + ) # Set aspect ratio to be equal so that pie is drawn as a circle. - plt.axis('equal') + plt.axis("equal") # plt.show() plt.tight_layout() # trims margins down nicely - plt.savefig(filename + '.pdf', bbox_inches=extent, pad_inches=0) + plt.savefig(f"{filename}.pdf", bbox_inches=extent, pad_inches=0) plt.close() - - -if __name__ == "__main__": - setup() - - m = MyMessage('Figure Omitted\nNo Vulnerabilities Detected') - m.plot('message') diff --git a/report/https_scan_report.mustache b/src/report/https_scan_report.mustache similarity index 100% rename from report/https_scan_report.mustache rename to src/report/https_scan_report.mustache diff --git a/src/report/pdf_capture.js b/src/report/pdf_capture.js new file mode 100644 index 0000000..22a6e21 --- /dev/null +++ b/src/report/pdf_capture.js @@ -0,0 +1,76 @@ +var system = require("system"); +var page = require("webpage").create(); +var url, outfile, vp_width, vp_height; + +page.onError = function (msg, trace) { + var msgStack = ["ERROR: " + msg]; + + if (trace && trace.length) { + msgStack.push("TRACE:"); + trace.forEach(function (t) { + msgStack.push( + " -> " + + t.file + + ": " + + t.line + + (t.function ? ' (in function "' + t.function + '")' : "") + ); + }); + } + console.error(msgStack.join("\n")); +}; + +page.onConsoleMessage = function (msg, lineNum, sourceId) { + console.log( + "CONSOLE: " + msg + " (from line #" + lineNum + ' in "' + sourceId + '")' + ); +}; + +if (system.args.length != 5) { + console.log("Usage: pdf_capture.js URL filename window_width window_height"); + phantom.exit(1); +} else { + url = system.args[1]; + outfile = system.args[2]; + vp_width = parseInt(system.args[3]); + vp_height = parseInt(system.args[4]); + page.viewportSize = { width: vp_width, height: vp_height }; + page.paperSize = { + width: vp_width + "px", + height: vp_height + 12 + "px", + margin: "0px", + }; // Add 12 pixels of height to keep PDF on 1 page + + function onPageReady() { + page.evaluate(function () { + return document.documentElement.outerHTML; + }); + //console.log(htmlContent); + page.render(outfile); + console.log("Wrote output file: " + outfile); + phantom.exit(); + } + + console.log("\nOpening page: " + url); + page.open(url, function (status) { + function checkReadyState() { + setTimeout(function () { + var readyState = page.evaluate(function () { + return ( + typeof cybex_chart4 !== "undefined" && + cybex_chart4.data().length > 0 + ); + }); + + if (readyState) { + setTimeout(function () { + onPageReady(); + }, 1000); // Wait for d3 transition to complete after data has been rec'd + } else { + checkReadyState(); + } + }, 200); + } + checkReadyState(); + }); +} diff --git a/src/secrets/database_creds.yml b/src/secrets/database_creds.yml new file mode 100644 index 0000000..1ce60fa --- /dev/null +++ b/src/secrets/database_creds.yml @@ -0,0 +1,6 @@ +--- +version: '1' + +database: + name: cyhy + uri: mongodb://readonly:the_password@cyhy.example.com:27017/cyhy diff --git a/src/version.txt b/src/version.txt new file mode 100644 index 0000000..c68196d --- /dev/null +++ b/src/version.txt @@ -0,0 +1 @@ +__version__ = "1.2.0" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5edfe20 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,45 @@ +"""pytest plugin configuration. + +https://docs.pytest.org/en/latest/writing_plugins.html#conftest-py-plugins +""" +# Third-Party Libraries +import pytest + +MAIN_SERVICE_NAME = "pshtt_reporter" +VERSION_SERVICE_NAME = f"{MAIN_SERVICE_NAME}-version" + + +@pytest.fixture(scope="session") +def main_container(dockerc): + """Return the main container from the Docker composition.""" + # find the container by name even if it is stopped already + return dockerc.containers(service_names=[MAIN_SERVICE_NAME], stopped=True)[0] + + +# See #45 +# @pytest.fixture(scope="session") +# def version_container(dockerc): +# """Return the version container from the Docker composition. + +# The version container should just output the version of its underlying contents. +# """ +# # find the container by name even if it is stopped already +# return dockerc.containers(service_names=[VERSION_SERVICE_NAME], stopped=True)[0] + + +def pytest_addoption(parser): + """Add new commandline options to pytest.""" + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_collection_modifyitems(config, items): + """Modify collected tests based on custom marks and commandline options.""" + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) diff --git a/tests/container_test.py b/tests/container_test.py new file mode 100644 index 0000000..05d5d78 --- /dev/null +++ b/tests/container_test.py @@ -0,0 +1,95 @@ +#!/usr/bin/env pytest -vs +"""Tests for pshtt_reporter container.""" +# TODO: Make container tests functional +# See https://github.com/cisagov/pshtt_reporter/issues/45 + +# Standard Python Libraries +import os + +# import time + +# import pytest + +RELEASE_TAG = os.getenv("RELEASE_TAG") +VERSION_FILE = "src/version.txt" + + +def test_container_count(dockerc): + """Verify the test composition and container.""" + # stopped parameter allows non-running containers in results + assert ( + len(dockerc.containers(stopped=True)) == 2 + ), "Wrong number of containers were started." + + +# See #45 +# def test_wait_for_ready(main_container): +# """Wait for container to be ready.""" +# TIMEOUT = 10 +# for i in range(TIMEOUT): +# if READY_MESSAGE in main_container.logs().decode("utf-8"): +# break +# time.sleep(1) +# else: +# raise Exception( +# f"Container does not seem ready. " +# f'Expected "{READY_MESSAGE}" in the log within {TIMEOUT} seconds.' +# ) + + +# See #45 +# def test_wait_for_exits(main_container, version_container): +# """Wait for containers to exit.""" +# assert main_container.wait() == 0, "Container service (main) did not exit cleanly" +# assert ( +# version_container.wait() == 0 +# ), "Container service (version) did not exit cleanly" + + +# See #45 +# def test_output(main_container): +# """Verify the container had the correct output.""" +# main_container.wait() # make sure container exited if running test isolated +# log_output = main_container.logs().decode("utf-8") +# assert SECRET_QUOTE in log_output, "Secret not found in log output." + + +# See #45 +# @pytest.mark.skipif( +# RELEASE_TAG in [None, ""], reason="this is not a release (RELEASE_TAG not set)" +# ) +# def test_release_version(): +# """Verify that release tag version agrees with the module version.""" +# pkg_vars = {} +# with open(VERSION_FILE) as f: +# exec(f.read(), pkg_vars) # nosec +# project_version = pkg_vars["__version__"] +# assert ( +# RELEASE_TAG == f"v{project_version}" +# ), "RELEASE_TAG does not match the project version" + + +# See #45 +# def test_log_version(version_container): +# """Verify the container outputs the correct version to the logs.""" +# version_container.wait() # make sure container exited if running test isolated +# log_output = version_container.logs().decode("utf-8").strip() +# pkg_vars = {} +# with open(VERSION_FILE) as f: +# exec(f.read(), pkg_vars) # nosec +# project_version = pkg_vars["__version__"] +# assert ( +# log_output == project_version +# ), f"Container version output to log does not match project version file {VERSION_FILE}" + + +# See #45 +# def test_container_version_label_matches(version_container): +# """Verify the container version label is the correct version.""" +# pkg_vars = {} +# with open(VERSION_FILE) as f: +# exec(f.read(), pkg_vars) # nosec +# project_version = pkg_vars["__version__"] +# assert ( +# version_container.labels["org.opencontainers.image.version"] == project_version +# ), "Dockerfile version label does not match project version" diff --git a/travis_scripts/build_docker_image.sh b/travis_scripts/build_docker_image.sh deleted file mode 100755 index 9b8b3b5..0000000 --- a/travis_scripts/build_docker_image.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -set -o nounset -set -o errexit -set -o pipefail - -# semver uses a plus character for the build number (if present). -# This is invalid for a Docker tag, so we replace it with a minus. -version=$(./bump_version.sh show|sed "s/+/-/") -docker build -t "$IMAGE_NAME":"$version" . diff --git a/travis_scripts/deploy_to_docker_hub.sh b/travis_scripts/deploy_to_docker_hub.sh deleted file mode 100755 index cb04d01..0000000 --- a/travis_scripts/deploy_to_docker_hub.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -set -o nounset -set -o errexit -set -o pipefail - -echo "$DOCKER_PW" | docker login -u "$DOCKER_USER" --password-stdin -# semver uses a plus character for the build number (if present). -# This is invalid for a Docker tag, so we replace it with a minus. -version=$(./bump_version.sh show|sed "s/+/-/") -docker push "$IMAGE_NAME":"$version" diff --git a/version.txt b/version.txt deleted file mode 100644 index e9bc149..0000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -1.1.14