From 26553cee5d8e2cd4d721b479f57c189f927021e1 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Thu, 22 Feb 2024 11:22:07 +0800 Subject: [PATCH 001/100] Add Github Actions workflow --- .github/workflows/build.yml | 65 +++++++++++++++++++++++++++++++++++++ docker/Dockerfile | 24 ++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 docker/Dockerfile diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..eac99ca0 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,65 @@ +name: Build and Upload Artifact + +on: [push, workflow_dispatch] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Install Dependencies + run: | + sudo apt update + sudo apt install -y build-essential manpages-dev software-properties-common libcgal-dev libomp-dev libcairo2-dev nlohmann-json3-dev fftw3-dev + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt update && sudo apt install -y gcc-11 g++-11 + + - name: Build Project + run: | + CORES=$(nproc) + sudo cmake -B build + sudo make -j${CORES} -C build + + #- name: Run Tests + # run: | + # sudo make install -C build + # cd tests/ + # chmod +x stress_test.sh + # bash stress_test.sh + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: cartogram + path: ./build/bin/cartogram + + build-and-push-docker: + needs: build + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + - name: Download Artifact + uses: actions/download-artifact@v4 + with: + name: cartogram + path: ./artifact + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set lowercased repo name + id: vars + run: echo "lowercase_repo=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + - name: Build and Push Docker Image + uses: docker/build-push-action@v5 + with: + context: . + file: ./docker/Dockerfile + push: true + tags: ghcr.io/${{ env.lowercase_repo }}/cartogram:latest + build-args: CARTOGRAM_BIN=./artifact/cartogram diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..d7ddab12 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,24 @@ +FROM ubuntu:22.04 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + python3.10 python3-pip \ + build-essential \ + manpages-dev \ + software-properties-common \ + libcgal-dev \ + libomp-dev \ + libcairo2-dev \ + nlohmann-json3-dev \ + fftw3-dev \ + && rm -rf /var/lib/apt/lists/* + +# Update alternatives to use python3 as the default python version +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \ + && update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 + +# Copy the compiled binary +ARG CARTOGRAM_BIN +COPY ${CARTOGRAM_BIN} /opt/cartogram + +CMD ["tail", "-f", "/dev/null"] \ No newline at end of file From 33dc2eb1749fb7e53ef0a778742ffaea3bc58edc Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Sat, 9 Mar 2024 15:49:07 +0800 Subject: [PATCH 002/100] Actions: Create new release on push to main Replaces Docker image creation --- .github/workflows/build.yml | 80 +++++++++++++++++++++++-------------- docker/Dockerfile | 24 ----------- 2 files changed, 50 insertions(+), 54 deletions(-) delete mode 100644 docker/Dockerfile diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eac99ca0..49c21e86 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,13 +1,19 @@ name: Build and Upload Artifact -on: [push, workflow_dispatch] +on: + push: + branches: + - main + workflow_dispatch: jobs: - build: + build-and-release: runs-on: ubuntu-latest steps: - name: Checkout Repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Install Dependencies run: | @@ -34,32 +40,46 @@ jobs: with: name: cartogram path: ./build/bin/cartogram + + - name: Generate and Push Tag + id: generate_tag + run: | + # Configure git committer + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + # Fetch tags + git fetch --tags + + # Get the latest tag name, default to v1.0.0 if none exists + latest_tag=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "v1.0.0") + + # Extract the numeric components by stripping the 'v' prefix + version=${latest_tag#"v"} + # Split into array + IFS='.' read -ra version_parts <<< "$version" + + major=${version_parts[0]} + minor=${version_parts[1]} + patch=${version_parts[2]} + + # Increment the patch version + new_patch=$((patch + 1)) + new_tag="v${major}.${minor}.${new_patch}" + + # Create the new tag + git tag -a "${new_tag}" -m "New release ${new_tag}" + + # Push the tag to the repository + git push origin "${new_tag}" + + echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - build-and-push-docker: - needs: build - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - name: Download Artifact - uses: actions/download-artifact@v4 - with: - name: cartogram - path: ./artifact - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Set lowercased repo name - id: vars - run: echo "lowercase_repo=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV - - name: Build and Push Docker Image - uses: docker/build-push-action@v5 - with: - context: . - file: ./docker/Dockerfile - push: true - tags: ghcr.io/${{ env.lowercase_repo }}/cartogram:latest - build-args: CARTOGRAM_BIN=./artifact/cartogram + - name: Release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ steps.generate_tag.outputs.new_tag }} + files: build/bin/cartogram + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index d7ddab12..00000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM ubuntu:22.04 - -# Install dependencies -RUN apt-get update && apt-get install -y \ - python3.10 python3-pip \ - build-essential \ - manpages-dev \ - software-properties-common \ - libcgal-dev \ - libomp-dev \ - libcairo2-dev \ - nlohmann-json3-dev \ - fftw3-dev \ - && rm -rf /var/lib/apt/lists/* - -# Update alternatives to use python3 as the default python version -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \ - && update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 - -# Copy the compiled binary -ARG CARTOGRAM_BIN -COPY ${CARTOGRAM_BIN} /opt/cartogram - -CMD ["tail", "-f", "/dev/null"] \ No newline at end of file From a57e18a10ddfa0186d4fbc4bba400e6d9c21adc4 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 12 Mar 2024 15:56:17 +0800 Subject: [PATCH 003/100] Actions: Deploy binary to go-cart server --- .github/workflows/build.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 49c21e86..9b73f2ef 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,3 +83,15 @@ jobs: files: build/bin/cartogram env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + deploy: + needs: build-and-release + runs-on: ubuntu-latest + steps: + - name: Deploy binary to go-cart.io + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.DEPLOY_HOST }} + username: ${{ secrets.DEPLOY_USER }} + key: ${{ secrets.DEPLOY_SSH_KEY }} + script: /home/cartogram/deploy-cartogram-cpp.sh From f8ca81b1973d9fe65b9710e63f1e130761710ef0 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 12 Mar 2024 16:02:43 +0800 Subject: [PATCH 004/100] [skip ci] Disable deploy to go-cart --- .github/workflows/build.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b73f2ef..a754ddca 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,14 +84,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - deploy: - needs: build-and-release - runs-on: ubuntu-latest - steps: - - name: Deploy binary to go-cart.io - uses: appleboy/ssh-action@v1.0.3 - with: - host: ${{ secrets.DEPLOY_HOST }} - username: ${{ secrets.DEPLOY_USER }} - key: ${{ secrets.DEPLOY_SSH_KEY }} - script: /home/cartogram/deploy-cartogram-cpp.sh + # deploy: + # needs: build-and-release + # runs-on: ubuntu-latest + # steps: + # - name: Deploy binary to go-cart.io + # uses: appleboy/ssh-action@v1.0.3 + # with: + # host: ${{ secrets.DEPLOY_HOST }} + # username: ${{ secrets.DEPLOY_USER }} + # key: ${{ secrets.DEPLOY_SSH_KEY }} + # script: /home/cartogram/deploy-cartogram-cpp.sh From 5fd9418ac84dd1e1ed3e8b4d9e733a68867216d9 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Fri, 22 Mar 2024 16:29:21 +0800 Subject: [PATCH 005/100] Enable deployment --- .github/workflows/build.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a754ddca..9b73f2ef 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,14 +84,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # deploy: - # needs: build-and-release - # runs-on: ubuntu-latest - # steps: - # - name: Deploy binary to go-cart.io - # uses: appleboy/ssh-action@v1.0.3 - # with: - # host: ${{ secrets.DEPLOY_HOST }} - # username: ${{ secrets.DEPLOY_USER }} - # key: ${{ secrets.DEPLOY_SSH_KEY }} - # script: /home/cartogram/deploy-cartogram-cpp.sh + deploy: + needs: build-and-release + runs-on: ubuntu-latest + steps: + - name: Deploy binary to go-cart.io + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.DEPLOY_HOST }} + username: ${{ secrets.DEPLOY_USER }} + key: ${{ secrets.DEPLOY_SSH_KEY }} + script: /home/cartogram/deploy-cartogram-cpp.sh From bc8ddb713d1ccdac55ffda16560ccea7f9425ad5 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 26 Mar 2024 12:15:38 +0800 Subject: [PATCH 006/100] [skip ci] Actions: Update dependencies to compile binary --- .github/workflows/build.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b73f2ef..7a2c5e51 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,9 +18,9 @@ jobs: - name: Install Dependencies run: | sudo apt update - sudo apt install -y build-essential manpages-dev software-properties-common libcgal-dev libomp-dev libcairo2-dev nlohmann-json3-dev fftw3-dev + sudo apt install -y build-essential manpages-dev software-properties-common nlohmann-json3-dev libcgal-dev libomp-dev libfftw3-dev libcairo2-dev libmatplot++-dev libboost-all-dev cmake sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - sudo apt update && sudo apt install -y gcc-11 g++-11 + sudo apt update && sudo apt install -y gcc-13 g++-13 - name: Build Project run: | @@ -84,14 +84,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - deploy: - needs: build-and-release - runs-on: ubuntu-latest - steps: - - name: Deploy binary to go-cart.io - uses: appleboy/ssh-action@v1.0.3 - with: - host: ${{ secrets.DEPLOY_HOST }} - username: ${{ secrets.DEPLOY_USER }} - key: ${{ secrets.DEPLOY_SSH_KEY }} - script: /home/cartogram/deploy-cartogram-cpp.sh + # deploy: + # needs: build-and-release + # runs-on: ubuntu-latest + # steps: + # - name: Deploy binary to go-cart.io + # uses: appleboy/ssh-action@v1.0.3 + # with: + # host: ${{ secrets.DEPLOY_HOST }} + # username: ${{ secrets.DEPLOY_USER }} + # key: ${{ secrets.DEPLOY_SSH_KEY }} + # script: /home/cartogram/deploy-cartogram-cpp.sh From 926127e2d3ec19fae5f0df67b7e96bc5c48eb5a0 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Thu, 4 Apr 2024 00:20:35 +0800 Subject: [PATCH 007/100] Actions: Test macos build --- .github/workflows/build.yml | 6 +++--- .github/workflows/macos-build.yml | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/macos-build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7a2c5e51..1b75e4ba 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,9 +1,9 @@ name: Build and Upload Artifact on: - push: - branches: - - main + #push: + #branches: + #- main workflow_dispatch: jobs: diff --git a/.github/workflows/macos-build.yml b/.github/workflows/macos-build.yml new file mode 100644 index 00000000..7874d61b --- /dev/null +++ b/.github/workflows/macos-build.yml @@ -0,0 +1,25 @@ +name: Build and Upload Artifact (macOS) + +on: [push, workflow_dispatch] + +jobs: + build-macos: + runs-on: macos-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Install Dependencies with Homebrew + run: | + brew install llvm@17 libomp pkg-config boost fftw cgal nlohmann-json cmake cairo matplotplusplus + + - name: Build Project + run: | + cmake -B build + make -C build + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: cartogram + path: ./build/bin/cartogram \ No newline at end of file From 4c2586afcf452c0508b5b66be74b9fc10266dae8 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 23 Apr 2024 17:17:08 +0800 Subject: [PATCH 008/100] Fix compilation errors on Ubuntu * Removed matplot++ dependency * Downgraded cmake, gcc, g++ versions to be compatible with Ubuntu 22.04 out of the box * Changed -isystem flags to -I as it was affecting how gcc was searching for header files --- CMakeLists.txt | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d9952932..57758794 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.27) +cmake_minimum_required(VERSION 3.1) project(cartogram LANGUAGES CXX) # ========== Project Setup ========== @@ -16,13 +16,10 @@ endif() # Boost find_package(Boost REQUIRED COMPONENTS unit_test_framework) -# Matplot++ -find_package(Matplot++ REQUIRED) - # PkgConfig, fftw, and cairo find_package(PkgConfig REQUIRED) pkg_search_module(fftw REQUIRED fftw3 IMPORTED_TARGET) -pkg_search_module(CAIRO REQUIRED CAIRO IMPORTED_TARGET) +pkg_search_module(cairo REQUIRED cairo IMPORTED_TARGET) # ========== Compiler Setup ========== if(APPLE) @@ -35,8 +32,8 @@ if(APPLE) set(CMAKE_CXX_COMPILER "${LLVM_BASE_PATH}clang++") set(CMAKE_C_COMPILER "${LLVM_BASE_PATH}clang") elseif(UNIX) - set(CMAKE_CXX_COMPILER "g++-13") - set(CMAKE_C_COMPILER "gcc-13") + set(CMAKE_CXX_COMPILER "g++-11") + set(CMAKE_C_COMPILER "gcc-11") endif() # ========== Source Files ========== @@ -48,12 +45,12 @@ target_include_directories(cartogram PUBLIC ${PROJECT_SOURCE_DIR}/include ${Boost_INCLUDE_DIRS} PkgConfig::fftw - PkgConfig::CAIRO + PkgConfig::cairo ) # ========== Compile Options ========== if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(cartogram PRIVATE -isystem ${Boost_INCLUDE_DIRS}) + target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) target_compile_options(cartogram PRIVATE -ffp-contract=off) elseif(MSVC) target_compile_options(cartogram PRIVATE /external:I ${Boost_INCLUDE_DIRS}) @@ -65,8 +62,7 @@ target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated # ========== Linking Libraries ========== target_link_libraries(cartogram PkgConfig::fftw - PkgConfig::CAIRO - Matplot++::matplot + PkgConfig::cairo ) # ========== Installation ========== @@ -101,7 +97,7 @@ foreach(TEST_FILE ${TEST_FILES}) ) if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(${TEST_NAME} PRIVATE -isystem ${Boost_INCLUDE_DIRS}) + target_compile_options(${TEST_NAME} PRIVATE -I ${Boost_INCLUDE_DIRS}) target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) elseif(MSVC) target_compile_options(${TEST_NAME} PRIVATE /external:I ${Boost_INCLUDE_DIRS}) @@ -127,4 +123,4 @@ add_custom_command( POST_BUILD COMMENT "Uninstalling cartogram..." COMMAND xargs rm -vf < install_manifest.txt || echo "Nothing in install_manifest.txt to be uninstalled!" -) +) \ No newline at end of file From d6aef50a7e56a047b24087493f6348adc069cf37 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 23 Apr 2024 17:20:11 +0800 Subject: [PATCH 009/100] actions: Remove macos build workflow --- .github/workflows/macos-build.yml | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 .github/workflows/macos-build.yml diff --git a/.github/workflows/macos-build.yml b/.github/workflows/macos-build.yml deleted file mode 100644 index 7874d61b..00000000 --- a/.github/workflows/macos-build.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Build and Upload Artifact (macOS) - -on: [push, workflow_dispatch] - -jobs: - build-macos: - runs-on: macos-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Install Dependencies with Homebrew - run: | - brew install llvm@17 libomp pkg-config boost fftw cgal nlohmann-json cmake cairo matplotplusplus - - - name: Build Project - run: | - cmake -B build - make -C build - - - name: Upload Artifact - uses: actions/upload-artifact@v4 - with: - name: cartogram - path: ./build/bin/cartogram \ No newline at end of file From 7acc9ca10ae2d274503657ba860a8825b6c4b9a4 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 23 Apr 2024 17:21:48 +0800 Subject: [PATCH 010/100] actions: Update dependencies and disable auto releases For testing of new cartogram binary --- .github/workflows/build.yml | 72 ++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1b75e4ba..7640682c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,9 +18,9 @@ jobs: - name: Install Dependencies run: | sudo apt update - sudo apt install -y build-essential manpages-dev software-properties-common nlohmann-json3-dev libcgal-dev libomp-dev libfftw3-dev libcairo2-dev libmatplot++-dev libboost-all-dev cmake + sudo apt install -y build-essential manpages-dev software-properties-common nlohmann-json3-dev libcgal-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - sudo apt update && sudo apt install -y gcc-13 g++-13 + sudo apt update && sudo apt install -y gcc-11 g++-11 - name: Build Project run: | @@ -41,48 +41,48 @@ jobs: name: cartogram path: ./build/bin/cartogram - - name: Generate and Push Tag - id: generate_tag - run: | - # Configure git committer - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - # Fetch tags - git fetch --tags + # - name: Generate and Push Tag + # id: generate_tag + # run: | + # # Configure git committer + # git config --local user.email "action@github.com" + # git config --local user.name "GitHub Action" + # # Fetch tags + # git fetch --tags - # Get the latest tag name, default to v1.0.0 if none exists - latest_tag=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "v1.0.0") + # # Get the latest tag name, default to v1.0.0 if none exists + # latest_tag=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "v1.0.0") - # Extract the numeric components by stripping the 'v' prefix - version=${latest_tag#"v"} - # Split into array - IFS='.' read -ra version_parts <<< "$version" + # # Extract the numeric components by stripping the 'v' prefix + # version=${latest_tag#"v"} + # # Split into array + # IFS='.' read -ra version_parts <<< "$version" - major=${version_parts[0]} - minor=${version_parts[1]} - patch=${version_parts[2]} + # major=${version_parts[0]} + # minor=${version_parts[1]} + # patch=${version_parts[2]} - # Increment the patch version - new_patch=$((patch + 1)) - new_tag="v${major}.${minor}.${new_patch}" + # # Increment the patch version + # new_patch=$((patch + 1)) + # new_tag="v${major}.${minor}.${new_patch}" - # Create the new tag - git tag -a "${new_tag}" -m "New release ${new_tag}" + # # Create the new tag + # git tag -a "${new_tag}" -m "New release ${new_tag}" - # Push the tag to the repository - git push origin "${new_tag}" + # # Push the tag to the repository + # git push origin "${new_tag}" - echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Release - uses: softprops/action-gh-release@v2 - with: - tag_name: ${{ steps.generate_tag.outputs.new_tag }} - files: build/bin/cartogram - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # - name: Release + # uses: softprops/action-gh-release@v2 + # with: + # tag_name: ${{ steps.generate_tag.outputs.new_tag }} + # files: build/bin/cartogram + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # deploy: # needs: build-and-release From f735abc6b41542513e46c9d6bc21f84c853c4534 Mon Sep 17 00:00:00 2001 From: Carlsen Teo Date: Tue, 23 Apr 2024 18:28:15 +0800 Subject: [PATCH 011/100] actions: Re-enable release, change versioning system --- .github/workflows/build.yml | 78 ++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7640682c..6152d4a6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,9 +1,9 @@ name: Build and Upload Artifact on: - #push: - #branches: - #- main + push: + branches: + - main workflow_dispatch: jobs: @@ -41,48 +41,44 @@ jobs: name: cartogram path: ./build/bin/cartogram - # - name: Generate and Push Tag - # id: generate_tag - # run: | - # # Configure git committer - # git config --local user.email "action@github.com" - # git config --local user.name "GitHub Action" - # # Fetch tags - # git fetch --tags - - # # Get the latest tag name, default to v1.0.0 if none exists - # latest_tag=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "v1.0.0") - - # # Extract the numeric components by stripping the 'v' prefix - # version=${latest_tag#"v"} - # # Split into array - # IFS='.' read -ra version_parts <<< "$version" - - # major=${version_parts[0]} - # minor=${version_parts[1]} - # patch=${version_parts[2]} - - # # Increment the patch version - # new_patch=$((patch + 1)) - # new_tag="v${major}.${minor}.${new_patch}" + - name: Generate and Push Tag + id: generate_tag + run: | + # Configure git committer + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + # Fetch tags + git fetch --tags - # # Create the new tag - # git tag -a "${new_tag}" -m "New release ${new_tag}" + # Get the latest tag name, default to Release 0 if none exists + latest_tag=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "Release-0") - # # Push the tag to the repository - # git push origin "${new_tag}" + # Extract the release number by stripping the 'Release ' prefix + release_number=${latest_tag#"Release-"} + + # Increment the release number + new_release_number=$((release_number + 1)) + new_tag="Release-${new_release_number}" + + # Create the new tag + git tag -a "${new_tag}" -m "New release ${new_tag}" + + # Push the tag to the repository + git push origin "${new_tag}" - # echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + echo "release_number=${new_release_number}" >> $GITHUB_OUTPUT + echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # - name: Release - # uses: softprops/action-gh-release@v2 - # with: - # tag_name: ${{ steps.generate_tag.outputs.new_tag }} - # files: build/bin/cartogram - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ steps.generate_tag.outputs.new_tag }} + files: build/bin/cartogram + body: "Rolling release ${{ steps.generate_tag.outputs.release_number }}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # deploy: # needs: build-and-release From 6826d465d89b7994e14f9052d85bf463b5668342 Mon Sep 17 00:00:00 2001 From: Carlsen Teo <7454833+Wind1337@users.noreply.github.com> Date: Sun, 16 Jun 2024 13:37:45 +0800 Subject: [PATCH 012/100] [skip ci] Enable binary deployment --- .github/workflows/build.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6152d4a6..70a7e608 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -80,14 +80,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # deploy: - # needs: build-and-release - # runs-on: ubuntu-latest - # steps: - # - name: Deploy binary to go-cart.io - # uses: appleboy/ssh-action@v1.0.3 - # with: - # host: ${{ secrets.DEPLOY_HOST }} - # username: ${{ secrets.DEPLOY_USER }} - # key: ${{ secrets.DEPLOY_SSH_KEY }} - # script: /home/cartogram/deploy-cartogram-cpp.sh + deploy: + needs: build-and-release + runs-on: ubuntu-latest + steps: + - name: Deploy binary to go-cart.io + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.DEPLOY_HOST }} + username: ${{ secrets.DEPLOY_USER }} + key: ${{ secrets.DEPLOY_SSH_KEY }} + script: /home/cartogram/deploy-cartogram-cpp.sh From 72530da53855ecde2bc3abf14d3ce216762914af Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:29:16 +0800 Subject: [PATCH 013/100] Increase `cmake_minimum_required` version --- CMakeLists.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 97b0ef40..a150bfbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,10 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) + +if(UNIX AND NOT APPLE) + set(CMAKE_CXX_COMPILER "g++-11") + set(CMAKE_C_COMPILER "gcc-11") +endif() + project(cartogram LANGUAGES CXX) # ========== Project Setup ========== @@ -22,6 +28,11 @@ find_package(PkgConfig REQUIRED) pkg_search_module(fftw REQUIRED fftw3 IMPORTED_TARGET) pkg_search_module(cairo REQUIRED cairo IMPORTED_TARGET) +# OpenMP: allows parallelisation +# REQUIRED argument purposely not passed. +# We can test if OpenMP is available by with OpenMP_FOUND +find_package(OpenMP) + # ========== Source Files ========== file(GLOB_RECURSE CARTOGRAM_SOURCES "src/*.cpp") add_executable(cartogram ${CARTOGRAM_SOURCES}) From 80eda89d8d4204b6fed809afed9f4625f58e7886 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:38:05 +0800 Subject: [PATCH 014/100] Make `CMakeLists.txt` compatible with macOS --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a150bfbd..86859a43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.27) if(UNIX AND NOT APPLE) set(CMAKE_CXX_COMPILER "g++-11") @@ -47,7 +47,7 @@ target_include_directories(cartogram PUBLIC # ========== Compile Options ========== if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) + target_compile_options(cartogram PRIVATE -isystem ${Boost_INCLUDE_DIRS}) target_compile_options(cartogram PRIVATE -ffp-contract=off) elseif(MSVC) target_compile_options(cartogram PRIVATE /external:I ${Boost_INCLUDE_DIRS}) @@ -94,7 +94,7 @@ foreach(TEST_FILE ${TEST_FILES}) ) if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(${TEST_NAME} PRIVATE -I ${Boost_INCLUDE_DIRS}) + target_compile_options(${TEST_NAME} PRIVATE -isystem ${Boost_INCLUDE_DIRS}) target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) elseif(MSVC) target_compile_options(${TEST_NAME} PRIVATE /external:I ${Boost_INCLUDE_DIRS}) From 234b06c4e3b419dcb04abd3f2a1066f236f2938d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:52:12 +0800 Subject: [PATCH 015/100] Make `CMakeLists.txt` as similar to `main` as possible --- CMakeLists.txt | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86859a43..8f123b0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,5 @@ cmake_minimum_required(VERSION 3.27) -if(UNIX AND NOT APPLE) - set(CMAKE_CXX_COMPILER "g++-11") - set(CMAKE_C_COMPILER "gcc-11") -endif() - project(cartogram LANGUAGES CXX) # ========== Project Setup ========== @@ -23,15 +18,13 @@ endif() # Boost find_package(Boost REQUIRED COMPONENTS unit_test_framework) +# Matplot++ +find_package(Matplot++ REQUIRED) + # PkgConfig, fftw, and cairo find_package(PkgConfig REQUIRED) pkg_search_module(fftw REQUIRED fftw3 IMPORTED_TARGET) -pkg_search_module(cairo REQUIRED cairo IMPORTED_TARGET) - -# OpenMP: allows parallelisation -# REQUIRED argument purposely not passed. -# We can test if OpenMP is available by with OpenMP_FOUND -find_package(OpenMP) +pkg_search_module(CAIRO REQUIRED CAIRO IMPORTED_TARGET) # ========== Source Files ========== file(GLOB_RECURSE CARTOGRAM_SOURCES "src/*.cpp") @@ -42,7 +35,7 @@ target_include_directories(cartogram PUBLIC ${PROJECT_SOURCE_DIR}/include ${Boost_INCLUDE_DIRS} PkgConfig::fftw - PkgConfig::cairo + PkgConfig::CAIRO ) # ========== Compile Options ========== @@ -59,7 +52,7 @@ target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated # ========== Linking Libraries ========== target_link_libraries(cartogram PkgConfig::fftw - PkgConfig::cairo + PkgConfig::CAIRO ) # ========== Installation ========== From 419a919498152096777f1985bdd22fdd91789a52 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:54:06 +0800 Subject: [PATCH 016/100] Add the `mole`: one line causing the error We need to figure out why removing this line makes it not possible to compile on macOS anymore. --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f123b0b..24b43e36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated target_link_libraries(cartogram PkgConfig::fftw PkgConfig::CAIRO + Matplot++::matplot ) # ========== Installation ========== From 19688afcebb15d4ac026faf64a03b1947a2f8fe7 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:07:23 +0800 Subject: [PATCH 017/100] Make matplot++ conditional (temporary fix) --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 24b43e36..069d31b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.27) +cmake_minimum_required(VERSION 3.6) project(cartogram LANGUAGES CXX) @@ -19,7 +19,7 @@ endif() find_package(Boost REQUIRED COMPONENTS unit_test_framework) # Matplot++ -find_package(Matplot++ REQUIRED) +find_package(Matplot++) # PkgConfig, fftw, and cairo find_package(PkgConfig REQUIRED) @@ -53,7 +53,7 @@ target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated target_link_libraries(cartogram PkgConfig::fftw PkgConfig::CAIRO - Matplot++::matplot + $<$:Matplot++::matplot> ) # ========== Installation ========== From d003e8e27b8832c459f0e5b0020d0fbbf6050109 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:36:46 +0800 Subject: [PATCH 018/100] Separate build into configure and build step --- .github/workflows/build.yml | 86 ++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 70a7e608..56ddf073 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,14 +1,19 @@ name: Build and Upload Artifact on: + # either when a push is made to the main branch or when a pull request is merged push: - branches: - - main + branches: [ "main" ] workflow_dispatch: +env: + BUILD_TYPE: Release + jobs: build-and-release: + runs-on: ubuntu-latest + steps: - name: Checkout Repository uses: actions/checkout@v4 @@ -22,12 +27,26 @@ jobs: sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y sudo apt update && sudo apt install -y gcc-11 g++-11 - - name: Build Project + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build run: | CORES=$(nproc) + cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} ---target install -j${CORES} -- + + # - name: Test + # working-directory: ${{github.workspace}}/build + # # Execute tests defined by the CMake configuration. + # # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + # run: ctest -C ${{env.BUILD_TYPE}} + + - name: Build Project + run: | sudo cmake -B build sudo make -j${CORES} -C build - + #- name: Run Tests # run: | # sudo make install -C build @@ -40,36 +59,35 @@ jobs: with: name: cartogram path: ./build/bin/cartogram - - - name: Generate and Push Tag - id: generate_tag - run: | - # Configure git committer - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - # Fetch tags - git fetch --tags - - # Get the latest tag name, default to Release 0 if none exists - latest_tag=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "Release-0") - - # Extract the release number by stripping the 'Release ' prefix - release_number=${latest_tag#"Release-"} - - # Increment the release number - new_release_number=$((release_number + 1)) - new_tag="Release-${new_release_number}" - - # Create the new tag - git tag -a "${new_tag}" -m "New release ${new_tag}" - - # Push the tag to the repository - git push origin "${new_tag}" - - echo "release_number=${new_release_number}" >> $GITHUB_OUTPUT - echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Generate and Push Tag + id: generate_tag + run: | + # Configure git committer + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + + # Fetch tags + git fetch --tags + + # Get the current date + current_date=$(date +'%Y-%m-%d') + + # Get the short SHA of the current HEAD + short_sha=$(git rev-parse --short HEAD) + + # Create the new tag + new_tag="${current_date}-${short_sha}" + + # Create the new tag + git tag -a "${new_tag}" -m "New release ${new_tag}" + + # Push the tag to the repository + git push origin "${new_tag}" + + echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Release uses: softprops/action-gh-release@v2 From 46289c0cb905c4903d04826ea0ed77f9736f2a65 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:44:34 +0800 Subject: [PATCH 019/100] Remove CORES variable, add -j$(nproc) --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 56ddf073..878e2540 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,8 +33,7 @@ jobs: - name: Build run: | - CORES=$(nproc) - cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} ---target install -j${CORES} -- + cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} ---target install -j$(nproc) -- # - name: Test # working-directory: ${{github.workspace}}/build From 5535336f6a62072ed5be3e3243846b0f20478af8 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:48:37 +0800 Subject: [PATCH 020/100] Add "pr/adisidev/201" branch to run --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 878e2540..32e99695 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,7 @@ name: Build and Upload Artifact on: # either when a push is made to the main branch or when a pull request is merged push: - branches: [ "main" ] + branches: [ "main", "pr/adisidev/201" ] workflow_dispatch: env: From b40bd79a917cedda9824f809d7411fc15d0b5dec Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:55:51 +0800 Subject: [PATCH 021/100] Make cairo lowercase, so it works on ubuntu --- .github/workflows/build.yml | 2 +- CMakeLists.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 32e99695..e3815061 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,9 +2,9 @@ name: Build and Upload Artifact on: # either when a push is made to the main branch or when a pull request is merged + workflow_dispatch: push: branches: [ "main", "pr/adisidev/201" ] - workflow_dispatch: env: BUILD_TYPE: Release diff --git a/CMakeLists.txt b/CMakeLists.txt index 069d31b0..059cb88d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,7 @@ find_package(Matplot++) # PkgConfig, fftw, and cairo find_package(PkgConfig REQUIRED) pkg_search_module(fftw REQUIRED fftw3 IMPORTED_TARGET) -pkg_search_module(CAIRO REQUIRED CAIRO IMPORTED_TARGET) +pkg_search_module(cairo REQUIRED cairo IMPORTED_TARGET) # ========== Source Files ========== file(GLOB_RECURSE CARTOGRAM_SOURCES "src/*.cpp") @@ -35,7 +35,7 @@ target_include_directories(cartogram PUBLIC ${PROJECT_SOURCE_DIR}/include ${Boost_INCLUDE_DIRS} PkgConfig::fftw - PkgConfig::CAIRO + PkgConfig::cairo ) # ========== Compile Options ========== @@ -52,7 +52,7 @@ target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated # ========== Linking Libraries ========== target_link_libraries(cartogram PkgConfig::fftw - PkgConfig::CAIRO + PkgConfig::cairo $<$:Matplot++::matplot> ) From ee5c482f111545f4c173aedc1aee982be1630e0e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:03:34 +0800 Subject: [PATCH 022/100] Cache apt packages --- .github/workflows/build.yml | 13 +++++++++++++ apt-requirements.txt | 10 ++++++++++ 2 files changed, 23 insertions(+) create mode 100644 apt-requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3815061..7cfefd84 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,6 +20,14 @@ jobs: with: fetch-depth: 0 + - name: Cache APT packages + uses: actions/cache@v3 + with: + path: /var/cache/apt + key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} + restore-keys: | + ${{ runner.os }}-apt- + - name: Install Dependencies run: | sudo apt update @@ -27,6 +35,11 @@ jobs: sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y sudo apt update && sudo apt install -y gcc-11 g++-11 + - name: Save APT cache + uses: actions/cache@v3 + with: + path: /var/cache/apt + key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} diff --git a/apt-requirements.txt b/apt-requirements.txt new file mode 100644 index 00000000..ee202b4f --- /dev/null +++ b/apt-requirements.txt @@ -0,0 +1,10 @@ +build-essential +manpages-dev +software-properties-common +nlohmann-json3-dev +libcgal-dev +libomp-dev +libfftw3-dev +libcairo2-dev +libboost-all-dev +cmake \ No newline at end of file From 88cea227a4b7c5404634ba4d07850e1648f27ea3 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:05:32 +0800 Subject: [PATCH 023/100] Fix cmake build command --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7cfefd84..99a63d28 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,7 +46,7 @@ jobs: - name: Build run: | - cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} ---target install -j$(nproc) -- + cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --target install -j$(nproc) -- # - name: Test # working-directory: ${{github.workspace}}/build From 6ccd77c58e8add1068c1bece954876bcbd8854cb Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:17:45 +0800 Subject: [PATCH 024/100] Make CMakeLists.txt cross-compatible (ubunut / macos) --- CMakeLists.txt | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 059cb88d..f6712939 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,10 @@ cmake_minimum_required(VERSION 3.6) +if(UNIX AND NOT APPLE) + set(CMAKE_CXX_COMPILER "g++-11") + set(CMAKE_C_COMPILER "gcc-11") +endif() + project(cartogram LANGUAGES CXX) # ========== Project Setup ========== @@ -39,9 +44,12 @@ target_include_directories(cartogram PUBLIC ) # ========== Compile Options ========== -if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(cartogram PRIVATE -isystem ${Boost_INCLUDE_DIRS}) target_compile_options(cartogram PRIVATE -ffp-contract=off) +elseif(UNIX AND NOT APPLE) + target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) + target_compile_options(cartogram PRIVATE -ffp-contract=off) elseif(MSVC) target_compile_options(cartogram PRIVATE /external:I ${Boost_INCLUDE_DIRS}) endif() @@ -87,9 +95,12 @@ foreach(TEST_FILE ${TEST_FILES}) PkgConfig::fftw ) - if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(${TEST_NAME} PRIVATE -isystem ${Boost_INCLUDE_DIRS}) target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) + elseif(UNIX AND NOT APPLE) + target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) + target_compile_options(cartogram PRIVATE -ffp-contract=off) elseif(MSVC) target_compile_options(${TEST_NAME} PRIVATE /external:I ${Boost_INCLUDE_DIRS}) endif() From 853320d21d09822ceeff25625eff446d9e0d0d4c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:51:44 +0800 Subject: [PATCH 025/100] Update build.yml --- .github/workflows/build.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 99a63d28..9f9697f0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -59,12 +59,12 @@ jobs: sudo cmake -B build sudo make -j${CORES} -C build - #- name: Run Tests - # run: | - # sudo make install -C build - # cd tests/ - # chmod +x stress_test.sh - # bash stress_test.sh + - name: Run Tests + run: | + sudo make install -C build + cd tests/ + chmod +x stress_test.sh + bash stress_test.sh - name: Upload Artifact uses: actions/upload-artifact@v4 @@ -89,10 +89,7 @@ jobs: short_sha=$(git rev-parse --short HEAD) # Create the new tag - new_tag="${current_date}-${short_sha}" - - # Create the new tag - git tag -a "${new_tag}" -m "New release ${new_tag}" + git tag -a "${short_sha}" -m "New release on ${current_date}" # Push the tag to the repository git push origin "${new_tag}" From 513a99153cde87ca2a0a21338d72225d542a4f1c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 9 Oct 2024 15:53:20 +0800 Subject: [PATCH 026/100] Write _input.geojson after storing original geodivs --- src/main.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index eba784ed..205da616 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -185,18 +185,18 @@ int main(const int argc, const char *argv[]) // Rescale map to fit into a rectangular box [0, lx] * [0, ly] inset_state.rescale_map(long_grid_side_length, cart_info.is_world_map()); - // Output rescaled GeoJSON - cart_info.write_geojson( - geo_file_name, - map_name + "_input.geojson", - output_to_stdout); - if (output_to_stdout) { // Store original coordinates inset_state.store_original_geo_divs(); } + // Output rescaled GeoJSON + cart_info.write_geojson( + geo_file_name, + map_name + "_input.geojson", + output_to_stdout); + // Set up Fourier transforms const unsigned int lx = inset_state.lx(); const unsigned int ly = inset_state.ly(); From 7ea73f5fc3092a54511f27970f67d5a79e767ada Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:28:13 +0800 Subject: [PATCH 027/100] Separate build and deploy workflows --- .github/workflows/build.yml | 61 +++++++++++++----------------------- .github/workflows/deploy.yml | 17 ++++++++++ 2 files changed, 39 insertions(+), 39 deletions(-) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9f9697f0..14ffca02 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,7 @@ -name: Build and Upload Artifact +name: Build and Release (on GitHub only) on: - # either when a push is made to the main branch or when a pull request is merged + # when a push is made to the main branch (like when a pull request is merged, or something is pushed directly) workflow_dispatch: push: branches: [ "main", "pr/adisidev/201" ] @@ -20,13 +20,13 @@ jobs: with: fetch-depth: 0 - - name: Cache APT packages - uses: actions/cache@v3 - with: - path: /var/cache/apt - key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} - restore-keys: | - ${{ runner.os }}-apt- + # - name: Cache APT packages + # uses: actions/cache@v3 + # with: + # path: /var/cache/apt + # key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} + # restore-keys: | + # ${{ runner.os }}-apt- - name: Install Dependencies run: | @@ -35,11 +35,11 @@ jobs: sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y sudo apt update && sudo apt install -y gcc-11 g++-11 - - name: Save APT cache - uses: actions/cache@v3 - with: - path: /var/cache/apt - key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} + # - name: Save APT cache + # uses: actions/cache@v3 + # with: + # path: /var/cache/apt + # key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} @@ -48,23 +48,18 @@ jobs: run: | cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --target install -j$(nproc) -- - # - name: Test + # - name: Run CTest # working-directory: ${{github.workspace}}/build # # Execute tests defined by the CMake configuration. # # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail # run: ctest -C ${{env.BUILD_TYPE}} - - name: Build Project - run: | - sudo cmake -B build - sudo make -j${CORES} -C build - - - name: Run Tests - run: | - sudo make install -C build - cd tests/ - chmod +x stress_test.sh - bash stress_test.sh + # - name: Run Stress Test + # run: | + # sudo make install -C build + # cd tests/ + # chmod +x stress_test.sh + # bash stress_test.sh - name: Upload Artifact uses: actions/upload-artifact@v4 @@ -105,16 +100,4 @@ jobs: files: build/bin/cartogram body: "Rolling release ${{ steps.generate_tag.outputs.release_number }}" env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - deploy: - needs: build-and-release - runs-on: ubuntu-latest - steps: - - name: Deploy binary to go-cart.io - uses: appleboy/ssh-action@v1.0.3 - with: - host: ${{ secrets.DEPLOY_HOST }} - username: ${{ secrets.DEPLOY_USER }} - key: ${{ secrets.DEPLOY_SSH_KEY }} - script: /home/cartogram/deploy-cartogram-cpp.sh + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 00000000..691d1e39 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,17 @@ +name: Depoy (to go-cart.io) + +on: + # only when triggered manually, after we have tested it ourselves + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Deploy binary to go-cart.io + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.DEPLOY_HOST }} + username: ${{ secrets.DEPLOY_USER }} + key: ${{ secrets.DEPLOY_SSH_KEY }} + script: /home/cartogram/deploy-cartogram-cpp.sh \ No newline at end of file From 2d1b166dcfd250214e69b35ecb869883ec066be7 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:33:49 +0800 Subject: [PATCH 028/100] Change new_tag to short_sha --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 14ffca02..e9fa75b9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -87,9 +87,9 @@ jobs: git tag -a "${short_sha}" -m "New release on ${current_date}" # Push the tag to the repository - git push origin "${new_tag}" + git push origin "${short_sha}" - echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT + echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 5f2201ae6e3328164ed1db9a3cfd85e59d959dec Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 16:56:19 +0800 Subject: [PATCH 029/100] Update csv.hpp from vincentlaucsb/csv-parser --- src/cartogram_info/csv.hpp | 4397 +++++++++++++++++++++--------------- 1 file changed, 2520 insertions(+), 1877 deletions(-) diff --git a/src/cartogram_info/csv.hpp b/src/cartogram_info/csv.hpp index b5415978..811c8e14 100644 --- a/src/cartogram_info/csv.hpp +++ b/src/cartogram_info/csv.hpp @@ -1,11 +1,11 @@ #pragma once /* -CSV for C++, version 2.0.0, beta +CSV for C++, version 2.3.0 https://github.com/vincentlaucsb/csv-parser MIT License -Copyright (c) 2017-2020 Vincent La +Copyright (c) 2017-2024 Vincent La Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -34,12 +34,14 @@ SOFTWARE. */ +#include #include +#include #include #include -#include #include -#include +#include +#include #include #include @@ -410,7 +412,7 @@ struct basic_mmap * handle (which is closed when the object destructs or `unmap` is called), which is * then used to memory map the requested region. Upon failure, `error` is set to * indicate the reason and the object remains in an unmapped state. - * + * * The entire file is mapped. */ template @@ -449,7 +451,7 @@ struct basic_mmap * `handle`, which must be a valid file handle, which is used to memory map the * requested region. Upon failure, `error` is set to indicate the reason and the * object remains in an unmapped state. - * + * * The entire file is mapped. */ void map(const handle_type handle, std::error_code& error) @@ -1795,101 +1797,54 @@ using shared_ummap_sink = basic_shared_mmap_sink; #endif // MIO_SHARED_MMAP_HEADER /** @file - * Defines CSV global constants + * @brief Contains the main CSV parsing algorithm and various utility functions */ -#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include -#if defined(_WIN32) -#include -#define WIN32_LEAN_AND_MEAN -#undef max -#undef min -#elif defined(__linux__) -#include -#endif - -namespace csv { - namespace internals { - // PAGE_SIZE macro could be already defined by the host system. - #if defined(PAGE_SIZE) - #undef PAGE_SIZE - #endif - - // Get operating system specific details - #if defined(_WIN32) - inline int getpagesize() { - _SYSTEM_INFO sys_info = {}; - GetSystemInfo(&sys_info); - return sys_info.dwPageSize; - } - - /** Size of a memory page in bytes */ - const int PAGE_SIZE = getpagesize(); - - /** Returns the amount of available mmory */ - inline unsigned long long get_available_memory() - { - MEMORYSTATUSEX status; - status.dwLength = sizeof(status); - GlobalMemoryStatusEx(&status); - return status.ullAvailPhys; - } - #elif defined(__linux__) - // To be defined - inline unsigned long long get_available_memory() { - return 0; - } - - const int PAGE_SIZE = getpagesize(); - #else - // To be defined - inline unsigned long long get_available_memory() { - return 0; - } - - const int PAGE_SIZE = 4096; - #endif - - /** For functions that lazy load a large CSV, this determines how - * many bytes are read at a time - */ - constexpr size_t ITERATION_CHUNK_SIZE = 250000000; // 250MB - - // TODO: Move to another header file - template - inline bool is_equal(T a, T b, T epsilon = 0.001) { - /** Returns true if two floating point values are about the same */ - static_assert(std::is_floating_point::value, "T must be a floating point type."); - return std::abs(a - b) < epsilon; - } - } - - /** Integer indicating a requested column wasn't found. */ - constexpr int CSV_NOT_FOUND = -1; +#include +#include +#include +#include - /** Used for counting number of rows */ - using RowCount = long long int; -} /** @file - * @brief Implements data type parsing functionality + * A standalone header file containing shared code */ +#include +#include #include -#include -#include -#include +#include +#include -/** @file - * Defines various compatibility macros - */ +#if defined(_WIN32) +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +# undef max +# undef min +#elif defined(__linux__) +# include +#endif -/** Helper macro which should be #defined as "inline" - * in the single header version - */ + /** Helper macro which should be #defined as "inline" + * in the single header version + */ #define CSV_INLINE inline +#include + // Copyright 2017-2019 by Martin Moene // // string-view lite, a C++17-like string_view for C++98 and later. @@ -3211,9 +3166,9 @@ nssv_RESTORE_WARNINGS() #endif // NONSTD_SV_LITE_H_INCLUDED -// If there is another version of Hedley, then the newer one -// takes precedence. -// See: https://github.com/nemequ/hedley + // If there is another version of Hedley, then the newer one + // takes precedence. + // See: https://github.com/nemequ/hedley /* Hedley - https://nemequ.github.io/hedley * Created by Evan Nemerson * @@ -4722,6 +4677,9 @@ HEDLEY_DIAGNOSTIC_POP namespace csv { +#ifdef _MSC_VER +#pragma region Compatibility Macros +#endif /** * @def IF_CONSTEXPR * Expands to `if constexpr` in C++17 and `if` otherwise @@ -4731,396 +4689,213 @@ namespace csv { * Mainly used for global variables. * * @def CONSTEXPR - * Expands to `constexpr` in C++17 and `inline` otherwise. + * Expands to `constexpr` in decent compilers and `inline` otherwise. * Intended for functions and methods. */ - #if CMAKE_CXX_STANDARD == 17 || __cplusplus >= 201703L - #define CSV_HAS_CXX17 - #endif +#define STATIC_ASSERT(x) static_assert(x, "Assertion failed") - #ifdef CSV_HAS_CXX17 - #include - /** @typedef string_view - * The string_view class used by this library. - */ - using string_view = std::string_view; - #else - /** @typedef string_view - * The string_view class used by this library. - */ - using string_view = nonstd::string_view; - #endif +#if CMAKE_CXX_STANDARD == 17 || __cplusplus >= 201703L +#define CSV_HAS_CXX17 +#endif - #ifdef CSV_HAS_CXX17 - #define IF_CONSTEXPR if constexpr - #define CONSTEXPR_VALUE constexpr - #else - #define IF_CONSTEXPR if - #define CONSTEXPR_VALUE const - #endif +#if CMAKE_CXX_STANDARD >= 14 || __cplusplus >= 201402L +#define CSV_HAS_CXX14 +#endif + +#ifdef CSV_HAS_CXX17 +#include + /** @typedef string_view + * The string_view class used by this library. + */ + using string_view = std::string_view; +#else + /** @typedef string_view + * The string_view class used by this library. + */ + using string_view = nonstd::string_view; +#endif + +#ifdef CSV_HAS_CXX17 + #define IF_CONSTEXPR if constexpr + #define CONSTEXPR_VALUE constexpr + + #define CONSTEXPR_17 constexpr +#else + #define IF_CONSTEXPR if + #define CONSTEXPR_VALUE const + + #define CONSTEXPR_17 inline +#endif + +#ifdef CSV_HAS_CXX14 + template + using enable_if_t = std::enable_if_t; + + #define CONSTEXPR_14 constexpr + #define CONSTEXPR_VALUE_14 constexpr +#else + template + using enable_if_t = typename std::enable_if::type; + + #define CONSTEXPR_14 inline + #define CONSTEXPR_VALUE_14 const +#endif // Resolves g++ bug with regard to constexpr methods - #if defined __GNUC__ && !defined __clang__ - #if __GNUC__ >= 7 - #if defined(CSV_HAS_CXX17) && (__GNUC_MINOR__ >= 2 || __GNUC__ >= 8) - #define CONSTEXPR constexpr - #endif - #endif + // See: https://stackoverflow.com/questions/36489369/constexpr-non-static-member-function-with-non-constexpr-constructor-gcc-clang-d +#if defined __GNUC__ && !defined __clang__ + #if (__GNUC__ >= 7 &&__GNUC_MINOR__ >= 2) || (__GNUC__ >= 8) + #define CONSTEXPR constexpr + #endif #else #ifdef CSV_HAS_CXX17 - #define CONSTEXPR constexpr - #endif - #endif - - #ifndef CONSTEXPR - #define CONSTEXPR inline + #define CONSTEXPR constexpr #endif -} - +#endif -namespace csv { - /** Enumerates the different CSV field types that are - * recognized by this library - * - * @note Overflowing integers will be stored and classified as doubles. - * @note Unlike previous releases, integer enums here are platform agnostic. - */ - enum class DataType { - UNKNOWN = -1, - CSV_NULL, /**< Empty string */ - CSV_STRING, /**< Non-numeric string */ - CSV_INT8, /**< 8-bit integer */ - CSV_INT16, /**< 16-bit integer (short on MSVC/GCC) */ - CSV_INT32, /**< 32-bit integer (int on MSVC/GCC) */ - CSV_INT64, /**< 64-bit integer (long long on MSVC/GCC) */ - CSV_DOUBLE /**< Floating point value */ - }; +#ifndef CONSTEXPR +#define CONSTEXPR inline +#endif - static_assert(DataType::CSV_STRING < DataType::CSV_INT8, "String type should come before numeric types."); - static_assert(DataType::CSV_INT8 < DataType::CSV_INT64, "Smaller integer types should come before larger integer types."); - static_assert(DataType::CSV_INT64 < DataType::CSV_DOUBLE, "Integer types should come before floating point value types."); +#ifdef _MSC_VER +#pragma endregion +#endif namespace internals { - /** Compute 10 to the power of n */ - template - HEDLEY_CONST CONSTEXPR - long double pow10(const T& n) noexcept { - long double multiplicand = n > 0 ? 10 : 0.1, - ret = 1; - - // Make all numbers positive - T iterations = n > 0 ? n : -n; - - for (T i = 0; i < iterations; i++) { - ret *= multiplicand; - } + // PAGE_SIZE macro could be already defined by the host system. +#if defined(PAGE_SIZE) +#undef PAGE_SIZE +#endif - return ret; +// Get operating system specific details +#if defined(_WIN32) + inline int getpagesize() { + _SYSTEM_INFO sys_info = {}; + GetSystemInfo(&sys_info); + return std::max(sys_info.dwPageSize, sys_info.dwAllocationGranularity); } - /** Compute 10 to the power of n */ - template<> - HEDLEY_CONST CONSTEXPR - long double pow10(const unsigned& n) noexcept { - long double multiplicand = n > 0 ? 10 : 0.1, - ret = 1; + const int PAGE_SIZE = getpagesize(); +#elif defined(__linux__) + const int PAGE_SIZE = getpagesize(); +#else + /** Size of a memory page in bytes. Used by + * csv::internals::CSVFieldArray when allocating blocks. + */ + const int PAGE_SIZE = 4096; +#endif - for (unsigned i = 0; i < n; i++) { - ret *= multiplicand; - } + /** For functions that lazy load a large CSV, this determines how + * many bytes are read at a time + */ + constexpr size_t ITERATION_CHUNK_SIZE = 10000000; // 10MB - return ret; + template + inline bool is_equal(T a, T b, T epsilon = 0.001) { + /** Returns true if two floating point values are about the same */ + static_assert(std::is_floating_point::value, "T must be a floating point type."); + return std::abs(a - b) < epsilon; } -#ifndef DOXYGEN_SHOULD_SKIP_THIS - /** Private site-indexed array mapping byte sizes to an integer size enum */ - constexpr DataType int_type_arr[8] = { - DataType::CSV_INT8, // 1 - DataType::CSV_INT16, // 2 - DataType::UNKNOWN, - DataType::CSV_INT32, // 4 - DataType::UNKNOWN, - DataType::UNKNOWN, - DataType::UNKNOWN, - DataType::CSV_INT64 // 8 + /** @typedef ParseFlags + * An enum used for describing the significance of each character + * with respect to CSV parsing + * + * @see quote_escape_flag + */ + enum class ParseFlags { + QUOTE_ESCAPE_QUOTE = 0, /**< A quote inside or terminating a quote_escaped field */ + QUOTE = 2 | 1, /**< Characters which may signify a quote escape */ + NOT_SPECIAL = 4, /**< Characters with no special meaning or escaped delimiters and newlines */ + DELIMITER = 4 | 2, /**< Characters which signify a new field */ + NEWLINE = 4 | 2 | 1 /**< Characters which signify a new row */ }; - template - inline DataType type_num() { - static_assert(std::is_integral::value, "T should be an integral type."); - static_assert(sizeof(T) <= 8, "Byte size must be no greater than 8."); - return int_type_arr[sizeof(T) - 1]; + /** Transform the ParseFlags given the context of whether or not the current + * field is quote escaped */ + constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept { + return (ParseFlags)((int)flag & ~((int)ParseFlags::QUOTE * quote_escape)); } - template<> inline DataType type_num() { return DataType::CSV_DOUBLE; } - template<> inline DataType type_num() { return DataType::CSV_DOUBLE; } - template<> inline DataType type_num() { return DataType::CSV_DOUBLE; } - template<> inline DataType type_num() { return DataType::CSV_NULL; } - template<> inline DataType type_num() { return DataType::CSV_STRING; } - - CONSTEXPR DataType data_type(csv::string_view in, long double* const out = nullptr); -#endif + // Assumed to be true by parsing functions: allows for testing + // if an item is DELIMITER or NEWLINE with a >= statement + STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::NEWLINE); - /** Given a byte size, return the largest number than can be stored in - * an integer of that size + /** Optimizations for reducing branching in parsing loop * - * Note: Provides a platform-agnostic way of mapping names like "long int" to - * byte sizes + * Idea: The meaning of all non-quote characters changes depending + * on whether or not the parser is in a quote-escaped mode (0 or 1) */ - template - CONSTEXPR long double get_int_max() { - static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8, - "Bytes must be a power of 2 below 8."); + STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, false) == ParseFlags::NOT_SPECIAL); + STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, false) == ParseFlags::QUOTE); + STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, false) == ParseFlags::DELIMITER); + STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, false) == ParseFlags::NEWLINE); - IF_CONSTEXPR (sizeof(signed char) == Bytes) { - return (long double)std::numeric_limits::max(); - } + STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, true) == ParseFlags::NOT_SPECIAL); + STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, true) == ParseFlags::QUOTE_ESCAPE_QUOTE); + STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, true) == ParseFlags::NOT_SPECIAL); + STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, true) == ParseFlags::NOT_SPECIAL); - IF_CONSTEXPR (sizeof(short) == Bytes) { - return (long double)std::numeric_limits::max(); - } + /** An array which maps ASCII chars to a parsing flag */ + using ParseFlagMap = std::array; - IF_CONSTEXPR (sizeof(int) == Bytes) { - return (long double)std::numeric_limits::max(); - } + /** An array which maps ASCII chars to a flag indicating if it is whitespace */ + using WhitespaceMap = std::array; + } - IF_CONSTEXPR (sizeof(long int) == Bytes) { - return (long double)std::numeric_limits::max(); - } + /** Integer indicating a requested column wasn't found. */ + constexpr int CSV_NOT_FOUND = -1; +} - IF_CONSTEXPR (sizeof(long long int) == Bytes) { - return (long double)std::numeric_limits::max(); - } - HEDLEY_UNREACHABLE(); - } +namespace csv { + namespace internals { + struct ColNames; + using ColNamesPtr = std::shared_ptr; - /** Given a byte size, return the largest number than can be stored in - * an unsigned integer of that size - */ - template - CONSTEXPR long double get_uint_max() { - static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8, - "Bytes must be a power of 2 below 8."); + /** @struct ColNames + * A data structure for handling column name information. + * + * These are created by CSVReader and passed (via smart pointer) + * to CSVRow objects it creates, thus + * allowing for indexing by column name. + */ + struct ColNames { + public: + ColNames() = default; + ColNames(const std::vector& names) { + set_col_names(names); + } - IF_CONSTEXPR(sizeof(unsigned char) == Bytes) { - return (long double)std::numeric_limits::max(); - } - - IF_CONSTEXPR(sizeof(unsigned short) == Bytes) { - return (long double)std::numeric_limits::max(); - } - - IF_CONSTEXPR(sizeof(unsigned int) == Bytes) { - return (long double)std::numeric_limits::max(); - } - - IF_CONSTEXPR(sizeof(unsigned long int) == Bytes) { - return (long double)std::numeric_limits::max(); - } - - IF_CONSTEXPR(sizeof(unsigned long long int) == Bytes) { - return (long double)std::numeric_limits::max(); - } - - HEDLEY_UNREACHABLE(); - } - - /** Largest number that can be stored in a 8-bit integer */ - CONSTEXPR_VALUE long double CSV_INT8_MAX = get_int_max<1>(); - - /** Largest number that can be stored in a 16-bit integer */ - CONSTEXPR_VALUE long double CSV_INT16_MAX = get_int_max<2>(); - - /** Largest number that can be stored in a 32-bit integer */ - CONSTEXPR_VALUE long double CSV_INT32_MAX = get_int_max<4>(); - - /** Largest number that can be stored in a 64-bit integer */ - CONSTEXPR_VALUE long double CSV_INT64_MAX = get_int_max<8>(); - - /** Largest number that can be stored in a 8-bit ungisned integer */ - CONSTEXPR_VALUE long double CSV_UINT8_MAX = get_uint_max<1>(); - - /** Largest number that can be stored in a 16-bit unsigned integer */ - CONSTEXPR_VALUE long double CSV_UINT16_MAX = get_uint_max<2>(); - - /** Largest number that can be stored in a 32-bit unsigned integer */ - CONSTEXPR_VALUE long double CSV_UINT32_MAX = get_uint_max<4>(); - - /** Largest number that can be stored in a 64-bit unsigned integer */ - CONSTEXPR_VALUE long double CSV_UINT64_MAX = get_uint_max<8>(); - - /** Given a pointer to the start of what is start of - * the exponential part of a number written (possibly) in scientific notation - * parse the exponent - */ - HEDLEY_PRIVATE CONSTEXPR - DataType _process_potential_exponential( - csv::string_view exponential_part, - const long double& coeff, - long double * const out) { - long double exponent = 0; - auto result = data_type(exponential_part, &exponent); - - // Exponents in scientific notation should not be decimal numbers - if (result >= DataType::CSV_INT8 && result < DataType::CSV_DOUBLE) { - if (out) *out = coeff * pow10(exponent); - return DataType::CSV_DOUBLE; - } - - return DataType::CSV_STRING; - } - - /** Given the absolute value of an integer, determine what numeric type - * it fits in - */ - HEDLEY_PRIVATE HEDLEY_PURE CONSTEXPR - DataType _determine_integral_type(const long double& number) noexcept { - // We can assume number is always non-negative - assert(number >= 0); - - if (number <= internals::CSV_INT8_MAX) - return DataType::CSV_INT8; - else if (number <= internals::CSV_INT16_MAX) - return DataType::CSV_INT16; - else if (number <= internals::CSV_INT32_MAX) - return DataType::CSV_INT32; - else if (number <= internals::CSV_INT64_MAX) - return DataType::CSV_INT64; - else // Conversion to long long will cause an overflow - return DataType::CSV_DOUBLE; - } - - /** Distinguishes numeric from other text values. Used by various - * type casting functions, like csv_parser::CSVReader::read_row() - * - * #### Rules - * - Leading and trailing whitespace ("padding") ignored - * - A string of just whitespace is NULL - * - * @param[in] in String value to be examined - * @param[out] out Pointer to long double where results of numeric parsing - * get stored - */ - CONSTEXPR - DataType data_type(csv::string_view in, long double* const out) { - // Empty string --> NULL - if (in.size() == 0) - return DataType::CSV_NULL; - - bool ws_allowed = true, - neg_allowed = true, - dot_allowed = true, - digit_allowed = true, - has_digit = false, - prob_float = false; - - unsigned places_after_decimal = 0; - long double integral_part = 0, - decimal_part = 0; - - for (size_t i = 0, ilen = in.size(); i < ilen; i++) { - const char& current = in[i]; - - switch (current) { - case ' ': - if (!ws_allowed) { - if (isdigit(in[i - 1])) { - digit_allowed = false; - ws_allowed = true; - } - else { - // Ex: '510 123 4567' - return DataType::CSV_STRING; - } - } - break; - case '-': - if (!neg_allowed) { - // Ex: '510-123-4567' - return DataType::CSV_STRING; - } - - neg_allowed = false; - break; - case '.': - if (!dot_allowed) { - return DataType::CSV_STRING; - } - - dot_allowed = false; - prob_float = true; - break; - case 'e': - case 'E': - // Process scientific notation - if (prob_float || (i && i + 1 < ilen && isdigit(in[i - 1]))) { - size_t exponent_start_idx = i + 1; - prob_float = true; - - // Strip out plus sign - if (in[i + 1] == '+') { - exponent_start_idx++; - } - - return _process_potential_exponential( - in.substr(exponent_start_idx), - neg_allowed ? integral_part + decimal_part : -(integral_part + decimal_part), - out - ); - } - - return DataType::CSV_STRING; - break; - default: - short digit = current - '0'; - if (digit >= 0 && digit <= 9) { - // Process digit - has_digit = true; - - if (!digit_allowed) - return DataType::CSV_STRING; - else if (ws_allowed) // Ex: '510 456' - ws_allowed = false; - - // Build current number - if (prob_float) - decimal_part += digit / pow10(++places_after_decimal); - else - integral_part = (integral_part * 10) + digit; - } - else { - return DataType::CSV_STRING; - } - } - } - - // No non-numeric/non-whitespace characters found - if (has_digit) { - long double number = integral_part + decimal_part; - if (out) { - *out = neg_allowed ? number : -number; - } + std::vector get_col_names() const; + void set_col_names(const std::vector&); + int index_of(csv::string_view) const; - return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number); - } + bool empty() const noexcept { return this->col_names.empty(); } + size_t size() const noexcept; - // Just whitespace - return DataType::CSV_NULL; - } + private: + std::vector col_names; + std::unordered_map col_pos; + }; } } /** @file * Defines an object used to store CSV format settings */ +#include #include #include #include namespace csv { + namespace internals { + class IBasicCSVParser; + } + class CSVReader; /** Determines how to handle rows that are shorter or longer than the majority */ @@ -5137,7 +4912,7 @@ namespace csv { }; /** Stores information about how to parse a CSV file. - * Can be used to construct a csv::CSVReader. + * Can be used to construct a csv::CSVReader. */ class CSVFormat { public: @@ -5151,7 +4926,7 @@ namespace csv { CSVFormat& delimiter(char delim); /** Sets a list of potential delimiters - * + * * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap * @param[in] delim An array of possible delimiters to try parsing the CSV with */ @@ -5182,6 +4957,16 @@ namespace csv { */ CSVFormat& header_row(int row); + /** Tells the parser that this CSV has no header row + * + * @note Equivalent to `header_row(-1)` + * + */ + CSVFormat& no_header() { + this->header_row(-1); + return *this; + } + /** Turn quoting on or off */ CSVFormat& quote(bool use_quote) { this->no_quote = !use_quote; @@ -5189,23 +4974,17 @@ namespace csv { } /** Tells the parser how to handle columns of a different length than the others */ - CONSTEXPR CSVFormat& variable_columns(VariableColumnPolicy policy = VariableColumnPolicy::IGNORE_ROW) { + CONSTEXPR_14 CSVFormat& variable_columns(VariableColumnPolicy policy = VariableColumnPolicy::IGNORE_ROW) { this->variable_column_policy = policy; return *this; } /** Tells the parser how to handle columns of a different length than the others */ - CONSTEXPR CSVFormat& variable_columns(bool policy) { + CONSTEXPR_14 CSVFormat& variable_columns(bool policy) { this->variable_column_policy = (VariableColumnPolicy)policy; return *this; } - /** Tells the parser to detect and remove UTF-8 byte order marks */ - CONSTEXPR CSVFormat& detect_bom(bool detect = true) { - this->unicode_detect = detect; - return *this; - } - #ifndef DOXYGEN_SHOULD_SKIP_THIS char get_delim() const { // This error should never be received by end users. @@ -5223,14 +5002,13 @@ namespace csv { std::vector get_trim_chars() const { return this->trim_chars; } CONSTEXPR VariableColumnPolicy get_variable_column_policy() const { return this->variable_column_policy; } #endif - + /** CSVFormat for guessing the delimiter */ CSV_INLINE static CSVFormat guess_csv() { CSVFormat format; format.delimiter({ ',', '|', '\t', ';', '^' }) .quote('"') - .header_row(0) - .detect_bom(true); + .header_row(0); return format; } @@ -5240,7 +5018,8 @@ namespace csv { } friend CSVReader; - + friend internals::IBasicCSVParser; + private: /**< Throws an error if delimiters and trim characters overlap */ void assert_no_char_overlap(); @@ -5265,1082 +5044,2166 @@ namespace csv { /**< Allow variable length columns? */ VariableColumnPolicy variable_column_policy = VariableColumnPolicy::IGNORE_ROW; - - /**< Detect and strip out Unicode byte order marks */ - bool unicode_detect = true; }; } -#include -#include -#include -#include -#include -#include +/** @file + * Defines the data type used for storing information about a CSV row + */ -#include +#include #include -#include +#include +#include // For CSVField +#include // For CSVField #include #include +#include +#include #include -#include -#include +/** @file + * @brief Implements data type parsing functionality + */ + +#include +#include #include -#include +#include namespace csv { - namespace internals { - struct ColNames; - using ColNamesPtr = std::shared_ptr; + /** Enumerates the different CSV field types that are + * recognized by this library + * + * @note Overflowing integers will be stored and classified as doubles. + * @note Unlike previous releases, integer enums here are platform agnostic. + */ + enum class DataType { + UNKNOWN = -1, + CSV_NULL, /**< Empty string */ + CSV_STRING, /**< Non-numeric string */ + CSV_INT8, /**< 8-bit integer */ + CSV_INT16, /**< 16-bit integer (short on MSVC/GCC) */ + CSV_INT32, /**< 32-bit integer (int on MSVC/GCC) */ + CSV_INT64, /**< 64-bit integer (long long on MSVC/GCC) */ + CSV_BIGINT, /**< Value too big to fit in a 64-bit in */ + CSV_DOUBLE /**< Floating point value */ + }; - /** @struct ColNames - * A data structure for handling column name information. - * - * These are created by CSVReader and passed (via smart pointer) - * to CSVRow objects it creates, thus - * allowing for indexing by column name. + static_assert(DataType::CSV_STRING < DataType::CSV_INT8, "String type should come before numeric types."); + static_assert(DataType::CSV_INT8 < DataType::CSV_INT64, "Smaller integer types should come before larger integer types."); + static_assert(DataType::CSV_INT64 < DataType::CSV_DOUBLE, "Integer types should come before floating point value types."); + + namespace internals { + /** Compute 10 to the power of n */ + template + HEDLEY_CONST CONSTEXPR_14 + long double pow10(const T& n) noexcept { + long double multiplicand = n > 0 ? 10 : 0.1, + ret = 1; + + // Make all numbers positive + T iterations = n > 0 ? n : -n; + + for (T i = 0; i < iterations; i++) { + ret *= multiplicand; + } + + return ret; + } + + /** Compute 10 to the power of n */ + template<> + HEDLEY_CONST CONSTEXPR_14 + long double pow10(const unsigned& n) noexcept { + long double multiplicand = n > 0 ? 10 : 0.1, + ret = 1; + + for (unsigned i = 0; i < n; i++) { + ret *= multiplicand; + } + + return ret; + } + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + /** Private site-indexed array mapping byte sizes to an integer size enum */ + constexpr DataType int_type_arr[8] = { + DataType::CSV_INT8, // 1 + DataType::CSV_INT16, // 2 + DataType::UNKNOWN, + DataType::CSV_INT32, // 4 + DataType::UNKNOWN, + DataType::UNKNOWN, + DataType::UNKNOWN, + DataType::CSV_INT64 // 8 + }; + + template + inline DataType type_num() { + static_assert(std::is_integral::value, "T should be an integral type."); + static_assert(sizeof(T) <= 8, "Byte size must be no greater than 8."); + return int_type_arr[sizeof(T) - 1]; + } + + template<> inline DataType type_num() { return DataType::CSV_DOUBLE; } + template<> inline DataType type_num() { return DataType::CSV_DOUBLE; } + template<> inline DataType type_num() { return DataType::CSV_DOUBLE; } + template<> inline DataType type_num() { return DataType::CSV_NULL; } + template<> inline DataType type_num() { return DataType::CSV_STRING; } + + CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr, + const char decimalsymbol = '.'); +#endif + + /** Given a byte size, return the largest number than can be stored in + * an integer of that size + * + * Note: Provides a platform-agnostic way of mapping names like "long int" to + * byte sizes + */ + template + CONSTEXPR_14 long double get_int_max() { + static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8, + "Bytes must be a power of 2 below 8."); + + IF_CONSTEXPR (sizeof(signed char) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR (sizeof(short) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR (sizeof(int) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR (sizeof(long int) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR (sizeof(long long int) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + HEDLEY_UNREACHABLE(); + } + + /** Given a byte size, return the largest number than can be stored in + * an unsigned integer of that size + */ + template + CONSTEXPR_14 long double get_uint_max() { + static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8, + "Bytes must be a power of 2 below 8."); + + IF_CONSTEXPR(sizeof(unsigned char) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR(sizeof(unsigned short) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR(sizeof(unsigned int) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR(sizeof(unsigned long int) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + IF_CONSTEXPR(sizeof(unsigned long long int) == Bytes) { + return (long double)std::numeric_limits::max(); + } + + HEDLEY_UNREACHABLE(); + } + + /** Largest number that can be stored in a 8-bit integer */ + CONSTEXPR_VALUE_14 long double CSV_INT8_MAX = get_int_max<1>(); + + /** Largest number that can be stored in a 16-bit integer */ + CONSTEXPR_VALUE_14 long double CSV_INT16_MAX = get_int_max<2>(); + + /** Largest number that can be stored in a 32-bit integer */ + CONSTEXPR_VALUE_14 long double CSV_INT32_MAX = get_int_max<4>(); + + /** Largest number that can be stored in a 64-bit integer */ + CONSTEXPR_VALUE_14 long double CSV_INT64_MAX = get_int_max<8>(); + + /** Largest number that can be stored in a 8-bit ungisned integer */ + CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX = get_uint_max<1>(); + + /** Largest number that can be stored in a 16-bit unsigned integer */ + CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX = get_uint_max<2>(); + + /** Largest number that can be stored in a 32-bit unsigned integer */ + CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX = get_uint_max<4>(); + + /** Largest number that can be stored in a 64-bit unsigned integer */ + CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX = get_uint_max<8>(); + + /** Given a pointer to the start of what is start of + * the exponential part of a number written (possibly) in scientific notation + * parse the exponent + */ + HEDLEY_PRIVATE CONSTEXPR_14 + DataType _process_potential_exponential( + csv::string_view exponential_part, + const long double& coeff, + long double * const out) { + long double exponent = 0; + auto result = data_type(exponential_part, &exponent); + + // Exponents in scientific notation should not be decimal numbers + if (result >= DataType::CSV_INT8 && result < DataType::CSV_DOUBLE) { + if (out) *out = coeff * pow10(exponent); + return DataType::CSV_DOUBLE; + } + + return DataType::CSV_STRING; + } + + /** Given the absolute value of an integer, determine what numeric type + * it fits in + */ + HEDLEY_PRIVATE HEDLEY_PURE CONSTEXPR_14 + DataType _determine_integral_type(const long double& number) noexcept { + // We can assume number is always non-negative + assert(number >= 0); + + if (number <= internals::CSV_INT8_MAX) + return DataType::CSV_INT8; + else if (number <= internals::CSV_INT16_MAX) + return DataType::CSV_INT16; + else if (number <= internals::CSV_INT32_MAX) + return DataType::CSV_INT32; + else if (number <= internals::CSV_INT64_MAX) + return DataType::CSV_INT64; + else // Conversion to long long will cause an overflow + return DataType::CSV_BIGINT; + } + + /** Distinguishes numeric from other text values. Used by various + * type casting functions, like csv_parser::CSVReader::read_row() + * + * #### Rules + * - Leading and trailing whitespace ("padding") ignored + * - A string of just whitespace is NULL + * + * @param[in] in String value to be examined + * @param[out] out Pointer to long double where results of numeric parsing + * get stored + * @param[in] decimalSymbol the character separating integral and decimal part, + * defaults to '.' if omitted + */ + CONSTEXPR_14 + DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) { + // Empty string --> NULL + if (in.size() == 0) + return DataType::CSV_NULL; + + bool ws_allowed = true, + dot_allowed = true, + digit_allowed = true, + is_negative = false, + has_digit = false, + prob_float = false; + + unsigned places_after_decimal = 0; + long double integral_part = 0, + decimal_part = 0; + + for (size_t i = 0, ilen = in.size(); i < ilen; i++) { + const char& current = in[i]; + + switch (current) { + case ' ': + if (!ws_allowed) { + if (isdigit(in[i - 1])) { + digit_allowed = false; + ws_allowed = true; + } + else { + // Ex: '510 123 4567' + return DataType::CSV_STRING; + } + } + break; + case '+': + if (!ws_allowed) { + return DataType::CSV_STRING; + } + + break; + case '-': + if (!ws_allowed) { + // Ex: '510-123-4567' + return DataType::CSV_STRING; + } + + is_negative = true; + break; + // case decimalSymbol: not allowed because decimalSymbol is not a literal, + // it is handled in the default block + case 'e': + case 'E': + // Process scientific notation + if (prob_float || (i && i + 1 < ilen && isdigit(in[i - 1]))) { + size_t exponent_start_idx = i + 1; + prob_float = true; + + // Strip out plus sign + if (in[i + 1] == '+') { + exponent_start_idx++; + } + + return _process_potential_exponential( + in.substr(exponent_start_idx), + is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part, + out + ); + } + + return DataType::CSV_STRING; + break; + default: + short digit = static_cast(current - '0'); + if (digit >= 0 && digit <= 9) { + // Process digit + has_digit = true; + + if (!digit_allowed) + return DataType::CSV_STRING; + else if (ws_allowed) // Ex: '510 456' + ws_allowed = false; + + // Build current number + if (prob_float) + decimal_part += digit / pow10(++places_after_decimal); + else + integral_part = (integral_part * 10) + digit; + } + // case decimalSymbol: not allowed because decimalSymbol is not a literal. + else if (dot_allowed && current == decimalSymbol) { + dot_allowed = false; + prob_float = true; + } + else { + return DataType::CSV_STRING; + } + } + } + + // No non-numeric/non-whitespace characters found + if (has_digit) { + long double number = integral_part + decimal_part; + if (out) { + *out = is_negative ? -number : number; + } + + return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number); + } + + // Just whitespace + return DataType::CSV_NULL; + } + } +} + +namespace csv { + namespace internals { + class IBasicCSVParser; + + static const std::string ERROR_NAN = "Not a number."; + static const std::string ERROR_OVERFLOW = "Overflow error."; + static const std::string ERROR_FLOAT_TO_INT = + "Attempted to convert a floating point value to an integral type."; + static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types."; + + std::string json_escape_string(csv::string_view s) noexcept; + + /** A barebones class used for describing CSV fields */ + struct RawCSVField { + RawCSVField() = default; + RawCSVField(size_t _start, size_t _length, bool _double_quote = false) { + start = _start; + length = _length; + has_double_quote = _double_quote; + } + + /** The start of the field, relative to the beginning of the row */ + size_t start; + + /** The length of the row, ignoring quote escape characters */ + size_t length; + + /** Whether or not the field contains an escaped quote */ + bool has_double_quote; + }; + + /** A class used for efficiently storing RawCSVField objects and expanding as necessary + * + * @par Implementation + * This data structure stores RawCSVField in continguous blocks. When more capacity + * is needed, a new block is allocated, but previous data stays put. + * + * @par Thread Safety + * This class may be safely read from multiple threads and written to from one, + * as long as the writing thread does not actively touch fields which are being + * read. + */ + class CSVFieldList { + public: + /** Construct a CSVFieldList which allocates blocks of a certain size */ + CSVFieldList(size_t single_buffer_capacity = (size_t)(internals::PAGE_SIZE / sizeof(RawCSVField))) : + _single_buffer_capacity(single_buffer_capacity) { + this->allocate(); + } + + // No copy constructor + CSVFieldList(const CSVFieldList& other) = delete; + + // CSVFieldArrays may be moved + CSVFieldList(CSVFieldList&& other) : + _single_buffer_capacity(other._single_buffer_capacity) { + + for (auto&& buffer : other.buffers) { + this->buffers.emplace_back(std::move(buffer)); + } + + _current_buffer_size = other._current_buffer_size; + _back = other._back; + } + + template + void emplace_back(Args&&... args) { + if (this->_current_buffer_size == this->_single_buffer_capacity) { + this->allocate(); + } + + *(_back++) = RawCSVField(std::forward(args)...); + _current_buffer_size++; + } + + size_t size() const noexcept { + return this->_current_buffer_size + ((this->buffers.size() - 1) * this->_single_buffer_capacity); + } + + RawCSVField& operator[](size_t n) const; + + private: + const size_t _single_buffer_capacity; + + /** + * Prefer std::deque over std::vector because it does not + * reallocate upon expansion, allowing pointers to its members + * to remain valid & avoiding potential race conditions when + * CSVFieldList is accesssed simulatenously by a reading thread and + * a writing thread + */ + std::deque> buffers = {}; + + /** Number of items in the current buffer */ + size_t _current_buffer_size = 0; + + /** Pointer to the current empty field */ + RawCSVField* _back = nullptr; + + /** Allocate a new page of memory */ + void allocate(); + }; + + /** A class for storing raw CSV data and associated metadata */ + struct RawCSVData { + std::shared_ptr _data = nullptr; + csv::string_view data = ""; + + internals::CSVFieldList fields; + + std::unordered_set has_double_quotes = {}; + + // TODO: Consider replacing with a more thread-safe structure + std::unordered_map double_quote_fields = {}; + + internals::ColNamesPtr col_names = nullptr; + internals::ParseFlagMap parse_flags; + internals::WhitespaceMap ws_flags; + }; + + using RawCSVDataPtr = std::shared_ptr; + } + + /** + * @class CSVField + * @brief Data type representing individual CSV values. + * CSVFields can be obtained by using CSVRow::operator[] + */ + class CSVField { + public: + /** Constructs a CSVField from a string_view */ + constexpr explicit CSVField(csv::string_view _sv) noexcept : sv(_sv) { }; + + operator std::string() const { + return std::string(" ") + std::string(this->sv); + } + + /** Returns the value casted to the requested type, performing type checking before. + * + * \par Valid options for T + * - std::string or csv::string_view + * - signed integral types (signed char, short, int, long int, long long int) + * - floating point types (float, double, long double) + * - unsigned integers are not supported at this time, but may be in a later release + * + * \par Invalid conversions + * - Converting non-numeric values to any numeric type + * - Converting floating point values to integers + * - Converting a large integer to a smaller type that will not hold it + * + * @note This method is capable of parsing scientific E-notation. + * See [this page](md_docs_source_scientific_notation.html) + * for more details. + * + * @throws std::runtime_error Thrown if an invalid conversion is performed. + * + * @warning Currently, conversions to floating point types are not + * checked for loss of precision + * + * @warning Any string_views returned are only guaranteed to be valid + * if the parent CSVRow is still alive. If you are concerned + * about object lifetimes, then grab a std::string or a + * numeric value. + * + */ + template T get() { + IF_CONSTEXPR(std::is_arithmetic::value) { + // Note: this->type() also converts the CSV value to float + if (this->type() <= DataType::CSV_STRING) { + throw std::runtime_error(internals::ERROR_NAN); + } + } + + IF_CONSTEXPR(std::is_integral::value) { + // Note: this->is_float() also converts the CSV value to float + if (this->is_float()) { + throw std::runtime_error(internals::ERROR_FLOAT_TO_INT); + } + + IF_CONSTEXPR(std::is_unsigned::value) { + if (this->value < 0) { + throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED); + } + } + } + + // Allow fallthrough from previous if branch + IF_CONSTEXPR(!std::is_floating_point::value) { + IF_CONSTEXPR(std::is_unsigned::value) { + // Quick hack to perform correct unsigned integer boundary checks + if (this->value > internals::get_uint_max()) { + throw std::runtime_error(internals::ERROR_OVERFLOW); + } + } + else if (internals::type_num() < this->_type) { + throw std::runtime_error(internals::ERROR_OVERFLOW); + } + } + + return static_cast(this->value); + } + + /** Parse a hexadecimal value, returning false if the value is not hex. */ + bool try_parse_hex(int& parsedValue); + + /** Attempts to parse a decimal (or integer) value using the given symbol, + * returning `true` if the value is numeric. + * + * @note This method also updates this field's type + * + */ + bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.'); + + /** Compares the contents of this field to a numeric value. If this + * field does not contain a numeric value, then all comparisons return + * false. + * + * @note Floating point values are considered equal if they are within + * `0.000001` of each other. + * + * @warning Multiple numeric comparisons involving the same field can + * be done more efficiently by calling the CSVField::get<>() method. + * + * @sa csv::CSVField::operator==(const char * other) + * @sa csv::CSVField::operator==(csv::string_view other) + */ + template + CONSTEXPR_14 bool operator==(T other) const noexcept + { + static_assert(std::is_arithmetic::value, + "T should be a numeric value."); + + if (this->_type != DataType::UNKNOWN) { + if (this->_type == DataType::CSV_STRING) { + return false; + } + + return internals::is_equal(value, static_cast(other), 0.000001L); + } + + long double out = 0; + if (internals::data_type(this->sv, &out) == DataType::CSV_STRING) { + return false; + } + + return internals::is_equal(out, static_cast(other), 0.000001L); + } + + /** Return a string view over the field's contents */ + CONSTEXPR csv::string_view get_sv() const noexcept { return this->sv; } + + /** Returns true if field is an empty string or string of whitespace characters */ + CONSTEXPR_14 bool is_null() noexcept { return type() == DataType::CSV_NULL; } + + /** Returns true if field is a non-numeric, non-empty string */ + CONSTEXPR_14 bool is_str() noexcept { return type() == DataType::CSV_STRING; } + + /** Returns true if field is an integer or float */ + CONSTEXPR_14 bool is_num() noexcept { return type() >= DataType::CSV_INT8; } + + /** Returns true if field is an integer */ + CONSTEXPR_14 bool is_int() noexcept { + return (type() >= DataType::CSV_INT8) && (type() <= DataType::CSV_INT64); + } + + /** Returns true if field is a floating point value */ + CONSTEXPR_14 bool is_float() noexcept { return type() == DataType::CSV_DOUBLE; }; + + /** Return the type of the underlying CSV data */ + CONSTEXPR_14 DataType type() noexcept { + this->get_value(); + return _type; + } + + private: + long double value = 0; /**< Cached numeric value */ + csv::string_view sv = ""; /**< A pointer to this field's text */ + DataType _type = DataType::UNKNOWN; /**< Cached data type value */ + CONSTEXPR_14 void get_value() noexcept { + /* Check to see if value has been cached previously, if not + * evaluate it */ - struct ColNames { - public: - ColNames() = default; - ColNames(const std::vector& names) { - set_col_names(names); + if ((int)_type < 0) { + this->_type = internals::data_type(this->sv, &this->value); } + } + }; + + /** Data structure for representing CSV rows */ + class CSVRow { + public: + friend internals::IBasicCSVParser; + + CSVRow() = default; + + /** Construct a CSVRow from a RawCSVDataPtr */ + CSVRow(internals::RawCSVDataPtr _data) : data(_data) {} + CSVRow(internals::RawCSVDataPtr _data, size_t _data_start, size_t _field_bounds) + : data(_data), data_start(_data_start), fields_start(_field_bounds) {} + + /** Indicates whether row is empty or not */ + CONSTEXPR bool empty() const noexcept { return this->size() == 0; } + + /** Return the number of fields in this row */ + CONSTEXPR size_t size() const noexcept { return row_length; } + + /** @name Value Retrieval */ + ///@{ + CSVField operator[](size_t n) const; + CSVField operator[](const std::string&) const; + std::string to_json(const std::vector& subset = {}) const; + std::string to_json_array(const std::vector& subset = {}) const; + + /** Retrieve this row's associated column names */ + std::vector get_col_names() const { + return this->data->col_names->get_col_names(); + } + + /** Convert this CSVRow into a vector of strings. + * **Note**: This is a less efficient method of + * accessing data than using the [] operator. + */ + operator std::vector() const; + ///@} + + /** A random access iterator over the contents of a CSV row. + * Each iterator points to a CSVField. + */ + class iterator { + public: +#ifndef DOXYGEN_SHOULD_SKIP_THIS + using value_type = CSVField; + using difference_type = int; + using pointer = std::shared_ptr; + using reference = CSVField & ; + using iterator_category = std::random_access_iterator_tag; +#endif + iterator(const CSVRow*, int i); + + reference operator*() const; + pointer operator->() const; + + iterator operator++(int); + iterator& operator++(); + iterator operator--(int); + iterator& operator--(); + iterator operator+(difference_type n) const; + iterator operator-(difference_type n) const; + + /** Two iterators are equal if they point to the same field */ + CONSTEXPR bool operator==(const iterator& other) const noexcept { + return this->i == other.i; + }; + + CONSTEXPR bool operator!=(const iterator& other) const noexcept { return !operator==(other); } + +#ifndef NDEBUG + friend CSVRow; +#endif + + private: + const CSVRow * daddy = nullptr; // Pointer to parent + std::shared_ptr field = nullptr; // Current field pointed at + int i = 0; // Index of current field + }; + + /** A reverse iterator over the contents of a CSVRow. */ + using reverse_iterator = std::reverse_iterator; + + /** @name Iterators + * @brief Each iterator points to a CSVField object. + */ + ///@{ + iterator begin() const; + iterator end() const noexcept; + reverse_iterator rbegin() const noexcept; + reverse_iterator rend() const; + ///@} + + private: + /** Retrieve a string view corresponding to the specified index */ + csv::string_view get_field(size_t index) const; + + internals::RawCSVDataPtr data; + + /** Where in RawCSVData.data we start */ + size_t data_start = 0; + + /** Where in the RawCSVDataPtr.fields array we start */ + size_t fields_start = 0; + + /** How many columns this row spans */ + size_t row_length = 0; + }; + +#ifdef _MSC_VER +#pragma region CSVField::get Specializations +#endif + /** Retrieve this field's original string */ + template<> + inline std::string CSVField::get() { + return std::string(this->sv); + } + + /** Retrieve a view over this field's string + * + * @warning This string_view is only guaranteed to be valid as long as this + * CSVRow is still alive. + */ + template<> + CONSTEXPR_14 csv::string_view CSVField::get() { + return this->sv; + } - std::vector get_col_names() const; - void set_col_names(const std::vector&); - int index_of(csv::string_view) const; + /** Retrieve this field's value as a long double */ + template<> + CONSTEXPR_14 long double CSVField::get() { + if (!is_num()) + throw std::runtime_error(internals::ERROR_NAN); - bool empty() const { return this->col_names.empty(); } - size_t size() const; + return this->value; + } +#ifdef _MSC_VER +#pragma endregion CSVField::get Specializations +#endif - private: - std::vector col_names; - std::unordered_map col_pos; - }; + /** Compares the contents of this field to a string */ + template<> + CONSTEXPR bool CSVField::operator==(const char * other) const noexcept + { + return this->sv == other; + } + + /** Compares the contents of this field to a string */ + template<> + CONSTEXPR bool CSVField::operator==(csv::string_view other) const noexcept + { + return this->sv == other; } } -/** @file - * Defines the data type used for storing information about a CSV row - */ -#include -#include -#include -#include -#include // For ColNames -#include -#include // For CSVField -#include // For CSVField -#include +inline std::ostream& operator << (std::ostream& os, csv::CSVField const& value) { + os << std::string(value); + return os; +} namespace csv { - class BasicCSVParser; + namespace internals { + /** Create a vector v where each index i corresponds to the + * ASCII number for a character and, v[i + 128] labels it according to + * the CSVReader::ParseFlags enum + */ + HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter) { + std::array ret = {}; + for (int i = -128; i < 128; i++) { + const int arr_idx = i + 128; + char ch = char(i); - struct RawCSVField { - size_t start; - size_t length; - }; + if (ch == delimiter) + ret[arr_idx] = ParseFlags::DELIMITER; + else if (ch == '\r' || ch == '\n') + ret[arr_idx] = ParseFlags::NEWLINE; + else + ret[arr_idx] = ParseFlags::NOT_SPECIAL; + } - namespace internals { - static const std::string ERROR_NAN = "Not a number."; - static const std::string ERROR_OVERFLOW = "Overflow error."; - static const std::string ERROR_FLOAT_TO_INT = - "Attempted to convert a floating point value to an integral type."; - static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types."; + return ret; + } - std::string json_escape_string(csv::string_view s) noexcept; + /** Create a vector v where each index i corresponds to the + * ASCII number for a character and, v[i + 128] labels it according to + * the CSVReader::ParseFlags enum + */ + HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter, char quote_char) { + std::array ret = make_parse_flags(delimiter); + ret[(size_t)quote_char + 128] = ParseFlags::QUOTE; + return ret; + } + + /** Create a vector v where each index i corresponds to the + * ASCII number for a character c and, v[i + 128] is true if + * c is a whitespace character + */ + HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char* ws_chars, size_t n_chars) { + std::array ret = {}; + for (int i = -128; i < 128; i++) { + const int arr_idx = i + 128; + char ch = char(i); + ret[arr_idx] = false; + + for (size_t j = 0; j < n_chars; j++) { + if (ws_chars[j] == ch) { + ret[arr_idx] = true; + } + } + } + + return ret; + } - /** A class used for efficiently storing RawCSVField objects and expanding as necessary */ - class CSVFieldArray { + inline WhitespaceMap make_ws_flags(const std::vector& flags) { + return make_ws_flags(flags.data(), flags.size()); + } + + CSV_INLINE size_t get_file_size(csv::string_view filename); + + CSV_INLINE std::string get_csv_head(csv::string_view filename); + + /** Read the first 500KB of a CSV file */ + CSV_INLINE std::string get_csv_head(csv::string_view filename, size_t file_size); + + /** A std::deque wrapper which allows multiple read and write threads to concurrently + * access it along with providing read threads the ability to wait for the deque + * to become populated + */ + template + class ThreadSafeDeque { public: - CSVFieldArray() { - this->allocate(); + ThreadSafeDeque(size_t notify_size = 100) : _notify_size(notify_size) {}; + ThreadSafeDeque(const ThreadSafeDeque& other) { + this->data = other.data; + this->_notify_size = other._notify_size; } - RawCSVField& operator[](size_t n) { - if (n > this->size()) { - throw std::runtime_error("Index out of bounds."); - } + ThreadSafeDeque(const std::deque& source) : ThreadSafeDeque() { + this->data = source; + } - size_t page_no = (size_t)std::floor((double)(n / this->single_buffer_capacity)); - size_t buffer_idx = (page_no < 1) ? n : n % this->single_buffer_capacity; - return this->buffers[page_no][buffer_idx]; + void clear() noexcept { this->data.clear(); } + + bool empty() const noexcept { + return this->data.empty(); } - void push_back(RawCSVField&& field) { - if (this->_current_buffer_size == this->single_buffer_capacity) { - this->allocate(); + T& front() noexcept { + return this->data.front(); + } + + T& operator[](size_t n) { + return this->data[n]; + } + + void push_back(T&& item) { + std::lock_guard lock{ this->_lock }; + this->data.push_back(std::move(item)); + + if (this->size() >= _notify_size) { + this->_cond.notify_all(); } + } - this->buffers.back()[this->_current_buffer_size] = std::move(field); - this->_current_buffer_size++; - this->_size++; + T pop_front() noexcept { + std::lock_guard lock{ this->_lock }; + T item = std::move(data.front()); + data.pop_front(); + return item; } - ~CSVFieldArray() { - for (auto& buffer : buffers) { - delete[] buffer; + size_t size() const noexcept { return this->data.size(); } + + /** Returns true if a thread is actively pushing items to this deque */ + constexpr bool is_waitable() const noexcept { return this->_is_waitable; } + + /** Wait for an item to become available */ + void wait() { + if (!is_waitable()) { + return; } + + std::unique_lock lock{ this->_lock }; + this->_cond.wait(lock, [this] { return this->size() >= _notify_size || !this->is_waitable(); }); + lock.unlock(); } - CONSTEXPR size_t size() const noexcept { - return this->_size; + typename std::deque::iterator begin() noexcept { + return this->data.begin(); } - private: - const size_t single_buffer_capacity = (size_t)(internals::PAGE_SIZE / alignof(RawCSVField)); + typename std::deque::iterator end() noexcept { + return this->data.end(); + } - std::vector buffers = {}; - size_t _current_buffer_size = 0; - size_t _size = 0; + /** Tell listeners that this deque is actively being pushed to */ + void notify_all() { + std::unique_lock lock{ this->_lock }; + this->_is_waitable = true; + this->_cond.notify_all(); + } - /** Allocate a new page of memory */ - void allocate(); + /** Tell all listeners to stop */ + void kill_all() { + std::unique_lock lock{ this->_lock }; + this->_is_waitable = false; + this->_cond.notify_all(); + } + + private: + bool _is_waitable = false; + size_t _notify_size; + std::mutex _lock; + std::condition_variable _cond; + std::deque data; }; + + constexpr const int UNINITIALIZED_FIELD = -1; } - /** A class for storing raw CSV data and associated metadata */ - struct RawCSVData { - std::string data = ""; - internals::CSVFieldArray fields; + /** Standard type for storing collection of rows */ + using RowCollection = internals::ThreadSafeDeque; - std::unordered_set has_double_quotes = {}; - std::unordered_map double_quote_fields = {}; - internals::ColNamesPtr col_names = nullptr; - }; + namespace internals { + /** Abstract base class which provides CSV parsing logic. + * + * Concrete implementations may customize this logic across + * different input sources, such as memory mapped files, stringstreams, + * etc... + */ + class IBasicCSVParser { + public: + IBasicCSVParser() = default; + IBasicCSVParser(const CSVFormat&, const ColNamesPtr&); + IBasicCSVParser(const ParseFlagMap& parse_flags, const WhitespaceMap& ws_flags + ) : _parse_flags(parse_flags), _ws_flags(ws_flags) {}; - using RawCSVDataPtr = std::shared_ptr; + virtual ~IBasicCSVParser() {} - /** - * @class CSVField - * @brief Data type representing individual CSV values. - * CSVFields can be obtained by using CSVRow::operator[] - */ - class CSVField { - public: - /** Constructs a CSVField from a string_view */ - constexpr explicit CSVField(csv::string_view _sv) : sv(_sv) { }; + /** Whether or not we have reached the end of source */ + bool eof() { return this->_eof; } - operator std::string() const { - return std::string(" ") + std::string(this->sv); - } + /** Parse the next block of data */ + virtual void next(size_t bytes) = 0; - /** Returns the value casted to the requested type, performing type checking before. - * - * \par Valid options for T - * - std::string or csv::string_view - * - signed integral types (signed char, short, int, long int, long long int) - * - floating point types (float, double, long double) - * - unsigned integers are not supported at this time, but may be in a later release - * - * \par Invalid conversions - * - Converting non-numeric values to any numeric type - * - Converting floating point values to integers - * - Converting a large integer to a smaller type that will not hold it - * - * @note This method is capable of parsing scientific E-notation. - * See [this page](md_docs_source_scientific_notation.html) - * for more details. - * - * @throws std::runtime_error Thrown if an invalid conversion is performed. - * - * @warning Currently, conversions to floating point types are not - * checked for loss of precision - * - * @warning Any string_views returned are only guaranteed to be valid - * if the parent CSVRow is still alive. If you are concerned - * about object lifetimes, then grab a std::string or a - * numeric value. - * - */ - template T get() { - IF_CONSTEXPR(std::is_arithmetic::value) { - // Note: this->type() also converts the CSV value to float - if (this->type() <= DataType::CSV_STRING) { - throw std::runtime_error(internals::ERROR_NAN); - } + /** Indicate the last block of data has been parsed */ + void end_feed(); + + CONSTEXPR_17 ParseFlags parse_flag(const char ch) const noexcept { + return _parse_flags.data()[ch + 128]; } - IF_CONSTEXPR(std::is_integral::value) { - // Note: this->is_float() also converts the CSV value to float - if (this->is_float()) { - throw std::runtime_error(internals::ERROR_FLOAT_TO_INT); + CONSTEXPR_17 ParseFlags compound_parse_flag(const char ch) const noexcept { + return quote_escape_flag(parse_flag(ch), this->quote_escape); + } + + /** Whether or not this CSV has a UTF-8 byte order mark */ + CONSTEXPR bool utf8_bom() const { return this->_utf8_bom; } + + void set_output(RowCollection& rows) { this->_records = &rows; } + + protected: + /** @name Current Parser State */ + ///@{ + CSVRow current_row; + RawCSVDataPtr data_ptr = nullptr; + ColNamesPtr _col_names = nullptr; + CSVFieldList* fields = nullptr; + int field_start = UNINITIALIZED_FIELD; + size_t field_length = 0; + + /** An array where the (i + 128)th slot gives the ParseFlags for ASCII character i */ + ParseFlagMap _parse_flags; + ///@} + + /** @name Current Stream/File State */ + ///@{ + bool _eof = false; + + /** The size of the incoming CSV */ + size_t source_size = 0; + ///@} + + /** Whether or not source needs to be read in chunks */ + CONSTEXPR bool no_chunk() const { return this->source_size < ITERATION_CHUNK_SIZE; } + + /** Parse the current chunk of data * + * + * @returns How many character were read that are part of complete rows + */ + size_t parse(); + + /** Create a new RawCSVDataPtr for a new chunk of data */ + void reset_data_ptr(); + private: + /** An array where the (i + 128)th slot determines whether ASCII character i should + * be trimmed + */ + WhitespaceMap _ws_flags; + bool quote_escape = false; + bool field_has_double_quote = false; + + /** Where we are in the current data block */ + size_t data_pos = 0; + + /** Whether or not an attempt to find Unicode BOM has been made */ + bool unicode_bom_scan = false; + bool _utf8_bom = false; + + /** Where complete rows should be pushed to */ + RowCollection* _records = nullptr; + + CONSTEXPR_17 bool ws_flag(const char ch) const noexcept { + return _ws_flags.data()[ch + 128]; + } + + size_t& current_row_start() { + return this->current_row.data_start; + } + + void parse_field() noexcept; + + /** Finish parsing the current field */ + void push_field(); + + /** Finish parsing the current row */ + void push_row(); + + /** Handle possible Unicode byte order mark */ + void trim_utf8_bom(); + }; + + /** A class for parsing CSV data from a `std::stringstream` + * or an `std::ifstream` + */ + template + class StreamParser: public IBasicCSVParser { + using RowCollection = ThreadSafeDeque; + + public: + StreamParser(TStream& source, + const CSVFormat& format, + const ColNamesPtr& col_names = nullptr + ) : IBasicCSVParser(format, col_names), _source(std::move(source)) {}; + + StreamParser( + TStream& source, + internals::ParseFlagMap parse_flags, + internals::WhitespaceMap ws_flags) : + IBasicCSVParser(parse_flags, ws_flags), + _source(std::move(source)) + {}; + + ~StreamParser() {} + + void next(size_t bytes = ITERATION_CHUNK_SIZE) override { + if (this->eof()) return; + + this->reset_data_ptr(); + this->data_ptr->_data = std::make_shared(); + + if (source_size == 0) { + const auto start = _source.tellg(); + _source.seekg(0, std::ios::end); + const auto end = _source.tellg(); + _source.seekg(0, std::ios::beg); + + source_size = end - start; } - IF_CONSTEXPR(std::is_unsigned::value) { - if (this->value < 0) { - throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED); - } + // Read data into buffer + size_t length = std::min(source_size - stream_pos, bytes); + std::unique_ptr buff(new char[length]); + _source.seekg(stream_pos, std::ios::beg); + _source.read(buff.get(), length); + stream_pos = _source.tellg(); + ((std::string*)(this->data_ptr->_data.get()))->assign(buff.get(), length); + + // Create string_view + this->data_ptr->data = *((std::string*)this->data_ptr->_data.get()); + + // Parse + this->current_row = CSVRow(this->data_ptr); + size_t remainder = this->parse(); + + if (stream_pos == source_size || no_chunk()) { + this->_eof = true; + this->end_feed(); + } + else { + this->stream_pos -= (length - remainder); } } - // Allow fallthrough from previous if branch - IF_CONSTEXPR(!std::is_floating_point::value) { - IF_CONSTEXPR(std::is_unsigned::value) { - // Quick hack to perform correct unsigned integer boundary checks - if (this->value > internals::get_uint_max()) { - throw std::runtime_error(internals::ERROR_OVERFLOW); - } - } - else if (internals::type_num() < this->_type) { - throw std::runtime_error(internals::ERROR_OVERFLOW); - } - } + private: + TStream _source; + size_t stream_pos = 0; + }; + + /** Parser for memory-mapped files + * + * @par Implementation + * This class constructs moving windows over a file to avoid + * creating massive memory maps which may require more RAM + * than the user has available. It contains logic to automatically + * re-align each memory map to the beginning of a CSV row. + * + */ + class MmapParser : public IBasicCSVParser { + public: + MmapParser(csv::string_view filename, + const CSVFormat& format, + const ColNamesPtr& col_names = nullptr + ) : IBasicCSVParser(format, col_names) { + this->_filename = filename.data(); + this->source_size = get_file_size(filename); + }; + + ~MmapParser() {} + + void next(size_t bytes) override; + + private: + std::string _filename; + size_t mmap_pos = 0; + }; + } +} + + +/** The all encompassing namespace */ +namespace csv { + /** Stuff that is generally not of interest to end-users */ + namespace internals { + std::string format_row(const std::vector& row, csv::string_view delim = ", "); + + std::vector _get_col_names( csv::string_view head, const CSVFormat format = CSVFormat::guess_csv()); + + struct GuessScore { + double score; + size_t header; + }; + + CSV_INLINE GuessScore calculate_score(csv::string_view head, const CSVFormat& format); + + CSVGuessResult _guess_format(csv::string_view head, const std::vector& delims = { ',', '|', '\t', ';', '^', '~' }); + } + + std::vector get_col_names( + csv::string_view filename, + const CSVFormat format = CSVFormat::guess_csv()); - return static_cast(this->value); - } + /** Guess the delimiter used by a delimiter-separated values file */ + CSVGuessResult guess_format(csv::string_view filename, + const std::vector& delims = { ',', '|', '\t', ';', '^', '~' }); - /** Compares the contents of this field to a numeric value. If this - * field does not contain a numeric value, then all comparisons return - * false. - * - * @note Floating point values are considered equal if they are within - * `0.000001` of each other. + /** @class CSVReader + * @brief Main class for parsing CSVs from files and in-memory sources + * + * All rows are compared to the column names for length consistency + * - By default, rows that are too short or too long are dropped + * - Custom behavior can be defined by overriding bad_row_handler in a subclass + */ + class CSVReader { + public: + /** + * An input iterator capable of handling large files. + * @note Created by CSVReader::begin() and CSVReader::end(). * - * @warning Multiple numeric comparisons involving the same field can - * be done more efficiently by calling the CSVField::get<>() method. + * @par Iterating over a file + * @snippet tests/test_csv_iterator.cpp CSVReader Iterator 1 * - * @sa csv::CSVField::operator==(const char * other) - * @sa csv::CSVField::operator==(csv::string_view other) + * @par Using with `` library + * @snippet tests/test_csv_iterator.cpp CSVReader Iterator 2 */ - template - bool operator==(T other) const - { - static_assert(std::is_arithmetic::value, - "T should be a numeric value."); - - if (this->_type != DataType::UNKNOWN) { - if (this->_type == DataType::CSV_STRING) { - return false; - } + class iterator { + public: + #ifndef DOXYGEN_SHOULD_SKIP_THIS + using value_type = CSVRow; + using difference_type = std::ptrdiff_t; + using pointer = CSVRow * ; + using reference = CSVRow & ; + using iterator_category = std::input_iterator_tag; + #endif - return internals::is_equal(value, static_cast(other), 0.000001L); - } + iterator() = default; + iterator(CSVReader* reader) : daddy(reader) {}; + iterator(CSVReader*, CSVRow&&); - long double out = 0; - if (internals::data_type(this->sv, &out) == DataType::CSV_STRING) { - return false; - } + /** Access the CSVRow held by the iterator */ + CONSTEXPR_14 reference operator*() { return this->row; } - return internals::is_equal(out, static_cast(other), 0.000001L); - } + /** Return a pointer to the CSVRow the iterator has stopped at */ + CONSTEXPR_14 pointer operator->() { return &(this->row); } - /** Return a string view over the field's contents */ - CONSTEXPR csv::string_view get_sv() const { return this->sv; } + iterator& operator++(); /**< Pre-increment iterator */ + iterator operator++(int); /**< Post-increment iterator */ - /** Returns true if field is an empty string or string of whitespace characters */ - CONSTEXPR bool is_null() { return type() == DataType::CSV_NULL; } + /** Returns true if iterators were constructed from the same CSVReader + * and point to the same row + */ + CONSTEXPR bool operator==(const iterator& other) const noexcept { + return (this->daddy == other.daddy) && (this->i == other.i); + } - /** Returns true if field is a non-numeric, non-empty string */ - CONSTEXPR bool is_str() { return type() == DataType::CSV_STRING; } + CONSTEXPR bool operator!=(const iterator& other) const noexcept { return !operator==(other); } + private: + CSVReader * daddy = nullptr; // Pointer to parent + CSVRow row; // Current row + size_t i = 0; // Index of current row + }; - /** Returns true if field is an integer or float */ - CONSTEXPR bool is_num() { return type() >= DataType::CSV_INT8; } + /** @name Constructors + * Constructors for iterating over large files and parsing in-memory sources. + */ + ///@{ + CSVReader(csv::string_view filename, CSVFormat format = CSVFormat::guess_csv()); - /** Returns true if field is an integer */ - CONSTEXPR bool is_int() { - return (type() >= DataType::CSV_INT8) && (type() <= DataType::CSV_INT64); - } + /** Allows parsing stream sources such as `std::stringstream` or `std::ifstream` + * + * @tparam TStream An input stream deriving from `std::istream` + * @note Currently this constructor requires special CSV dialects to be manually + * specified. + */ + template::value, int> = 0> + CSVReader(TStream& source, CSVFormat format = CSVFormat()) : _format(format) { + using Parser = internals::StreamParser; - /** Returns true if field is a floating point value */ - CONSTEXPR bool is_float() { return type() == DataType::CSV_DOUBLE; }; + if (!format.col_names.empty()) + this->set_col_names(format.col_names); - /** Return the type of the underlying CSV data */ - CONSTEXPR DataType type() { - this->get_value(); - return _type; + this->parser = std::unique_ptr( + new Parser(source, format, col_names)); // For C++11 + this->initial_read(); } + ///@} - private: - long double value = 0; /**< Cached numeric value */ - csv::string_view sv = ""; /**< A pointer to this field's text */ - DataType _type = DataType::UNKNOWN; /**< Cached data type value */ - CONSTEXPR void get_value() { - /* Check to see if value has been cached previously, if not - * evaluate it - */ - if ((int)_type < 0) { - this->_type = internals::data_type(this->sv, &this->value); + CSVReader(const CSVReader&) = delete; // No copy constructor + CSVReader(CSVReader&&) = default; // Move constructor + CSVReader& operator=(const CSVReader&) = delete; // No copy assignment + CSVReader& operator=(CSVReader&& other) = default; + ~CSVReader() { + if (this->read_csv_worker.joinable()) { + this->read_csv_worker.join(); } } - }; - - /** Data structure for representing CSV rows */ - class CSVRow { - public: - friend BasicCSVParser; - - CSVRow() = default; - - /** Construct a CSVRow from a RawCSVDataPtr */ - CSVRow(RawCSVDataPtr _data) : data(_data) {} - /** Indicates whether row is empty or not */ - CONSTEXPR bool empty() const { return this->size() == 0; } + /** @name Retrieving CSV Rows */ + ///@{ + bool read_row(CSVRow &row); + iterator begin(); + HEDLEY_CONST iterator end() const noexcept; - /** Return the number of fields in this row */ - CONSTEXPR size_t size() const { return row_length; } + /** Returns true if we have reached end of file */ + bool eof() const noexcept { return this->parser->eof(); }; + ///@} - /** @name Value Retrieval */ + /** @name CSV Metadata */ ///@{ - CSVField operator[](size_t n) const; - CSVField operator[](const std::string&) const; - csv::string_view get_field(size_t index) const; - std::string to_json(const std::vector& subset = {}) const; - std::string to_json_array(const std::vector& subset = {}) const; - std::vector get_col_names() const { - return this->data->col_names->get_col_names(); - } - - /** Convert this CSVRow into a vector of strings. - * **Note**: This is a less efficient method of - * accessing data than using the [] operator. - */ - operator std::vector() const; + CSVFormat get_format() const; + std::vector get_col_names() const; + int index_of(csv::string_view col_name) const; ///@} - /** A random access iterator over the contents of a CSV row. - * Each iterator points to a CSVField. + /** @name CSV Metadata: Attributes */ + ///@{ + /** Whether or not the file or stream contains valid CSV rows, + * not including the header. + * + * @note Gives an accurate answer regardless of when it is called. + * */ - class iterator { - public: -#ifndef DOXYGEN_SHOULD_SKIP_THIS - using value_type = CSVField; - using difference_type = int; + CONSTEXPR bool empty() const noexcept { return this->n_rows() == 0; } - // Using CSVField * as pointer type causes segfaults in MSVC debug builds - // but using shared_ptr as pointer type won't compile in g++ -#ifdef _MSC_BUILD - using pointer = std::shared_ptr; -#else - using pointer = CSVField * ; -#endif + /** Retrieves the number of rows that have been read so far */ + CONSTEXPR size_t n_rows() const noexcept { return this->_n_rows; } - using reference = CSVField & ; - using iterator_category = std::random_access_iterator_tag; -#endif - iterator(const CSVRow*, int i); + /** Whether or not CSV was prefixed with a UTF-8 bom */ + bool utf8_bom() const noexcept { return this->parser->utf8_bom(); } + ///@} - reference operator*() const; - pointer operator->() const; + protected: + /** + * \defgroup csv_internal CSV Parser Internals + * @brief Internals of CSVReader. Only maintainers and those looking to + * extend the parser should read this. + * @{ + */ - iterator operator++(int); - iterator& operator++(); - iterator operator--(int); - iterator& operator--(); - iterator operator+(difference_type n) const; - iterator operator-(difference_type n) const; + /** Sets this reader's column names and associated data */ + void set_col_names(const std::vector&); - /** Two iterators are equal if they point to the same field */ - constexpr bool operator==(const iterator& other) const { - return this->i == other.i; - }; + /** @name CSV Settings **/ + ///@{ + CSVFormat _format; + ///@} - constexpr bool operator!=(const iterator& other) const { return !operator==(other); } + /** @name Parser State */ + ///@{ + /** Pointer to a object containing column information */ + internals::ColNamesPtr col_names = std::make_shared(); -#ifndef NDEBUG - friend CSVRow; -#endif + /** Helper class which actually does the parsing */ + std::unique_ptr parser = nullptr; - private: - const CSVRow * daddy = nullptr; // Pointer to parent - std::shared_ptr field = nullptr; // Current field pointed at - int i = 0; // Index of current field - }; + /** Queue of parsed CSV rows */ + std::unique_ptr records{new RowCollection(100)}; - /** A reverse iterator over the contents of a CSVRow. */ - using reverse_iterator = std::reverse_iterator; + size_t n_cols = 0; /**< The number of columns in this CSV */ + size_t _n_rows = 0; /**< How many rows (minus header) have been read so far */ - /** @name Iterators - * @brief Each iterator points to a CSVField object. - */ - ///@{ - iterator begin() const; - iterator end() const; - reverse_iterator rbegin() const; - reverse_iterator rend() const; + /** @name Multi-Threaded File Reading Functions */ + ///@{ + bool read_csv(size_t bytes = internals::ITERATION_CHUNK_SIZE); ///@} + /**@}*/ + private: - RawCSVDataPtr data; + /** Whether or not rows before header were trimmed */ + bool header_trimmed = false; - /** Where in RawCSVData.data we start */ - size_t data_start = 0; + /** @name Multi-Threaded File Reading: Flags and State */ + ///@{ + std::thread read_csv_worker; /**< Worker thread for read_csv() */ + ///@} - /** Where in the RawCSVDataPtr.fields array we start */ - size_t field_bounds_index = 0; + /** Read initial chunk to get metadata */ + void initial_read() { + this->read_csv_worker = std::thread(&CSVReader::read_csv, this, internals::ITERATION_CHUNK_SIZE); + this->read_csv_worker.join(); + } - /** How many columns this row spans */ - size_t row_length = 0; + void trim_header(); }; +} -#ifdef _MSC_VER -#pragma region CSVField::get Specializations -#endif - /** Retrieve this field's original string */ - template<> - inline std::string CSVField::get() { - return std::string(this->sv); - } +/** @file + * Calculates statistics from CSV files + */ - /** Retrieve a view over this field's string +#include +#include +#include + +namespace csv { + /** Class for calculating statistics from CSV files and in-memory sources + * + * **Example** + * \include programs/csv_stats.cpp * - * @warning This string_view is only guaranteed to be valid as long as this - * CSVRow is still alive. */ - template<> - CONSTEXPR csv::string_view CSVField::get() { - return this->sv; - } + class CSVStat { + public: + using FreqCount = std::unordered_map; + using TypeCount = std::unordered_map; - /** Retrieve this field's value as a long double */ - template<> - CONSTEXPR long double CSVField::get() { - if (!is_num()) - throw std::runtime_error(internals::ERROR_NAN); + std::vector get_mean() const; + std::vector get_variance() const; + std::vector get_mins() const; + std::vector get_maxes() const; + std::vector get_counts() const; + std::vector get_dtypes() const; - return this->value; - } -#ifdef _MSC_VER -#pragma endregion CSVField::get Specializations -#endif + std::vector get_col_names() const { + return this->reader.get_col_names(); + } - /** Compares the contents of this field to a string */ - template<> - inline bool CSVField::operator==(const char * other) const - { - return this->sv == other; - } + CSVStat(csv::string_view filename, CSVFormat format = CSVFormat::guess_csv()); + CSVStat(std::stringstream& source, CSVFormat format = CSVFormat()); + private: + // An array of rolling averages + // Each index corresponds to the rolling mean for the column at said index + std::vector rolling_means; + std::vector rolling_vars; + std::vector mins; + std::vector maxes; + std::vector counts; + std::vector dtypes; + std::vector n; - /** Compares the contents of this field to a string */ - template<> - inline bool CSVField::operator==(csv::string_view other) const - { - return this->sv == other; - } -} + // Statistic calculators + void variance(const long double&, const size_t&); + void count(CSVField&, const size_t&); + void min_max(const long double&, const size_t&); + void dtype(CSVField&, const size_t&); -inline std::ostream& operator << (std::ostream& os, csv::CSVField const& value) { - os << std::string(value); - return os; + void calc(); + void calc_chunk(); + void calc_worker(const size_t&); + + CSVReader reader; + std::deque records = {}; + }; } +#include +#include +#include namespace csv { - namespace internals { - /** @typedef ParseFlags - * An enum used for describing the significance of each character - * with respect to CSV parsing - */ - enum class ParseFlags { - NOT_SPECIAL, /**< Characters with no special meaning */ - QUOTE, /**< Characters which may signify a quote escape */ - DELIMITER, /**< Characters which may signify a new field */ - NEWLINE /**< Characters which may signify a new row */ - }; + /** Returned by get_file_info() */ + struct CSVFileInfo { + std::string filename; /**< Filename */ + std::vector col_names; /**< CSV column names */ + char delim; /**< Delimiting character */ + size_t n_rows; /**< Number of rows in a file */ + size_t n_cols; /**< Number of columns in a CSV */ + }; - using ParseFlagMap = std::array; - using WhitespaceMap = std::array; - } + /** @name Shorthand Parsing Functions + * @brief Convienience functions for parsing small strings + */ + ///@{ + CSVReader operator ""_csv(const char*, size_t); + CSVReader operator ""_csv_no_header(const char*, size_t); + CSVReader parse(csv::string_view in, CSVFormat format = CSVFormat()); + CSVReader parse_no_header(csv::string_view in); + ///@} - /** A class for parsing raw CSV data */ - class BasicCSVParser { - public: - BasicCSVParser() = default; - BasicCSVParser(internals::ColNamesPtr _col_names) : col_names(_col_names) {}; - BasicCSVParser(internals::ParseFlagMap parse_flags, internals::WhitespaceMap ws_flags) : - _parse_flags(parse_flags), _ws_flags(ws_flags) {}; + /** @name Utility Functions */ + ///@{ + std::unordered_map csv_data_types(const std::string&); + CSVFileInfo get_file_info(const std::string& filename); + int get_col_pos(csv::string_view filename, csv::string_view col_name, + const CSVFormat& format = CSVFormat::guess_csv()); + ///@} +} +/** @file + * A standalone header file for writing delimiter-separated files + */ - void parse(csv::string_view in, std::deque& records); - void end_feed(std::deque& records) { - using internals::ParseFlags; +#include +#include +#include +#include +#include +#include - bool empty_last_field = this->current_row.data - && !this->current_row.data->data.empty() - && parse_flag(this->current_row.data->data.back()) == ParseFlags::DELIMITER; - if (this->field_length > 0 || empty_last_field) { - this->push_field(); - } +namespace csv { + namespace internals { + static int DECIMAL_PLACES = 5; - if (this->current_row.size() > 0) { - this->push_row(records); - } + /** + * Calculate the absolute value of a number + */ + template + inline T csv_abs(T x) { + return abs(x); } - void set_parse_flags(internals::ParseFlagMap parse_flags) { - _parse_flags = parse_flags; + template<> + inline int csv_abs(int x) { + return abs(x); } - void set_ws_flags(internals::WhitespaceMap ws_flags) { - _ws_flags = ws_flags; + template<> + inline long int csv_abs(long int x) { + return labs(x); } - private: - CONSTEXPR internals::ParseFlags parse_flag(const char ch) const { - return _parse_flags.data()[ch + 128]; + template<> + inline long long int csv_abs(long long int x) { + return llabs(x); } - CONSTEXPR bool ws_flag(const char ch) const { - return _ws_flags.data()[ch + 128]; + template<> + inline float csv_abs(float x) { + return fabsf(x); } - void push_field(); - CONSTEXPR void parse_field(csv::string_view in, size_t& i, const size_t& current_row_start, bool quote_escape = false); - - void parse_loop(csv::string_view in); - - void push_row(std::deque& records) { - current_row.row_length = current_row.data->fields.size() - current_row.field_bounds_index; - records.push_back(std::move(current_row)); - }; - - void set_data_ptr(RawCSVDataPtr ptr) { - this->data_ptr = ptr; - this->fields = &(ptr->fields); + template<> + inline double csv_abs(double x) { + return fabs(x); } - /** An array where the (i + 128)th slot gives the ParseFlags for ASCII character i */ - internals::ParseFlagMap _parse_flags; + template<> + inline long double csv_abs(long double x) { + return fabsl(x); + } - /** An array where the (i + 128)th slot determines whether ASCII character i should - * be trimmed + /** + * Calculate the number of digits in a number */ - internals::WhitespaceMap _ws_flags; - - internals::ColNamesPtr col_names = nullptr; + template< + typename T, + csv::enable_if_t::value, int> = 0 + > + int num_digits(T x) + { + x = csv_abs(x); - CSVRow current_row; - int field_start = -1; - size_t field_length = 0; - bool field_has_double_quote = false; + int digits = 0; - RawCSVDataPtr data_ptr = nullptr; - internals::CSVFieldArray* fields = nullptr; + while (x >= 1) { + x /= 10; + digits++; + } - std::deque* _records = nullptr; - }; -} + return digits; + } -namespace csv { - namespace internals { - /** A string buffer and its size. Consumed by read_csv_worker(). */ - using WorkItem = std::pair, size_t>; + /** to_string() for unsigned integers */ + template::value, int> = 0> + inline std::string to_string(T value) { + std::string digits_reverse = ""; - /** Create a vector v where each index i corresponds to the - * ASCII number for a character and, v[i + 128] labels it according to - * the CSVReader::ParseFlags enum - */ - HEDLEY_CONST CONSTEXPR ParseFlagMap make_parse_flags(char delimiter) { - std::array ret = {}; - for (int i = -128; i < 128; i++) { - const int arr_idx = i + 128; - char ch = char(i); + if (value == 0) return "0"; - if (ch == delimiter) - ret[arr_idx] = ParseFlags::DELIMITER; - else if (ch == '\r' || ch == '\n') - ret[arr_idx] = ParseFlags::NEWLINE; - else - ret[arr_idx] = ParseFlags::NOT_SPECIAL; + while (value > 0) { + digits_reverse += (char)('0' + (value % 10)); + value /= 10; } - return ret; + return std::string(digits_reverse.rbegin(), digits_reverse.rend()); } - /** Create a vector v where each index i corresponds to the - * ASCII number for a character and, v[i + 128] labels it according to - * the CSVReader::ParseFlags enum - */ - HEDLEY_CONST CONSTEXPR ParseFlagMap make_parse_flags(char delimiter, char quote_char) { - std::array ret = make_parse_flags(delimiter); - ret[(size_t)quote_char + 128] = ParseFlags::QUOTE; - return ret; + /** to_string() for signed integers */ + template< + typename T, + csv::enable_if_t::value && std::is_signed::value, int> = 0 + > + inline std::string to_string(T value) { + if (value >= 0) + return to_string((size_t)value); + + return "-" + to_string((size_t)(value * -1)); } - /** Create a vector v where each index i corresponds to the - * ASCII number for a character c and, v[i + 128] is true if - * c is a whitespace character - */ - HEDLEY_CONST CONSTEXPR WhitespaceMap make_ws_flags(const char * ws_chars, size_t n_chars) { - std::array ret = {}; - for (int i = -128; i < 128; i++) { - const int arr_idx = i + 128; - char ch = char(i); - ret[arr_idx] = false; + /** to_string() for floating point numbers */ + template< + typename T, + csv::enable_if_t::value, int> = 0 + > + inline std::string to_string(T value) { +#ifdef __clang__ + return std::to_string(value); +#else + // TODO: Figure out why the below code doesn't work on clang + std::string result = ""; - for (size_t j = 0; j < n_chars; j++) { - if (ws_chars[j] == ch) { - ret[arr_idx] = true; + T integral_part; + T fractional_part = std::abs(std::modf(value, &integral_part)); + integral_part = std::abs(integral_part); + + // Integral part + if (value < 0) result = "-"; + + if (integral_part == 0) { + result += "0"; + } + else { + for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) { + int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1)); + result += (char)('0' + digit); } } - } - return ret; + // Decimal part + result += "."; + + if (fractional_part > 0) { + fractional_part *= (T)(pow10(DECIMAL_PLACES)); + for (int n_digits = DECIMAL_PLACES; n_digits > 0; n_digits--) { + int digit = (int)(std::fmod(fractional_part, pow10(n_digits)) / pow10(n_digits - 1)); + result += (char)('0' + digit); + } + } + else { + result += "0"; + } + + return result; +#endif } + } - struct GuessScore { - double score; - size_t header; - }; + /** Sets how many places after the decimal will be written for floating point numbers + * + * @param precision Number of decimal places + */ +#ifndef __clang___ + inline static void set_decimal_places(int precision) { + internals::DECIMAL_PLACES = precision; + } +#endif + + /** @name CSV Writing */ + ///@{ + /** + * Class for writing delimiter separated values files + * + * To write formatted strings, one should + * -# Initialize a DelimWriter with respect to some output stream + * -# Call write_row() on std::vectors of unformatted text + * + * @tparam OutputStream The output stream, e.g. `std::ofstream`, `std::stringstream` + * @tparam Delim The delimiter character + * @tparam Quote The quote character + * @tparam Flush True: flush after every writing function, + * false: you need to flush explicitly if needed. + * In both cases the destructor will flush. + * + * @par Hint + * Use the aliases csv::CSVWriter to write CSV + * formatted strings and csv::TSVWriter + * to write tab separated strings + * + * @par Example w/ std::vector, std::deque, std::list + * @snippet test_write_csv.cpp CSV Writer Example + * + * @par Example w/ std::tuple + * @snippet test_write_csv.cpp CSV Writer Tuple Example + */ + template + class DelimWriter { + public: + /** Construct a DelimWriter over the specified output stream + * + * @param _out Stream to write to + * @param _quote_minimal Limit field quoting to only when necessary + */ - CSV_INLINE GuessScore calculate_score(csv::string_view head, CSVFormat format); + DelimWriter(OutputStream& _out, bool _quote_minimal = true) + : out(_out), quote_minimal(_quote_minimal) {}; - CSVGuessResult _guess_format(csv::string_view head, const std::vector& delims = { ',', '|', '\t', ';', '^', '~' }); + /** Construct a DelimWriter over the file + * + * @param[out] filename File to write to + */ + DelimWriter(const std::string& filename) : DelimWriter(std::ifstream(filename)) {}; - /** Read the first 500KB of a CSV file */ - CSV_INLINE std::string get_csv_head(csv::string_view filename); - } -} + /** Destructor will flush remaining data + * + */ + ~DelimWriter() { + out.flush(); + } -/** The all encompassing namespace */ -namespace csv { - /** Stuff that is generally not of interest to end-users */ - namespace internals { - std::string format_row(const std::vector& row, csv::string_view delim = ", "); + /** Format a sequence of strings and write to CSV according to RFC 4180 + * + * @warning This does not check to make sure row lengths are consistent + * + * @param[in] record Sequence of strings to be formatted + * + * @return The current DelimWriter instance (allowing for operator chaining) + */ + template + DelimWriter& operator<<(const std::array& record) { + for (size_t i = 0; i < Size; i++) { + out << csv_escape(record[i]); + if (i + 1 != Size) out << Delim; + } - std::vector _get_col_names( csv::string_view head, const CSVFormat format = CSVFormat::guess_csv()); - } + end_out(); + return *this; + } - std::vector get_col_names( - csv::string_view filename, - const CSVFormat format = CSVFormat::guess_csv()); + /** @copydoc operator<< */ + template + DelimWriter& operator<<(const std::tuple& record) { + this->write_tuple<0, T...>(record); + return *this; + } - /** Guess the delimiter used by a delimiter-separated values file */ - CSVGuessResult guess_format(csv::string_view filename, - const std::vector& delims = { ',', '|', '\t', ';', '^', '~' }); + /** + * @tparam T A container such as std::vector, std::deque, or std::list + * + * @copydoc operator<< + */ + template< + typename T, typename Alloc, template class Container, + + // Avoid conflicting with tuples with two elements + csv::enable_if_t::value, int> = 0 + > + DelimWriter& operator<<(const Container& record) { + const size_t ilen = record.size(); + size_t i = 0; + for (const auto& field : record) { + out << csv_escape(field); + if (i + 1 != ilen) out << Delim; + i++; + } + + end_out(); + return *this; + } - /** @class CSVReader - * @brief Main class for parsing CSVs from files and in-memory sources - * - * All rows are compared to the column names for length consistency - * - By default, rows that are too short or too long are dropped - * - Custom behavior can be defined by overriding bad_row_handler in a subclass - */ - class CSVReader { - public: - /** - * An input iterator capable of handling large files. - * @note Created by CSVReader::begin() and CSVReader::end(). + /** Flushes the written data * - * @par Iterating over a file - * @snippet tests/test_csv_iterator.cpp CSVReader Iterator 1 - * - * @par Using with `` library - * @snippet tests/test_csv_iterator.cpp CSVReader Iterator 2 */ - class iterator { - public: - #ifndef DOXYGEN_SHOULD_SKIP_THIS - using value_type = CSVRow; - using difference_type = std::ptrdiff_t; - using pointer = CSVRow * ; - using reference = CSVRow & ; - using iterator_category = std::input_iterator_tag; - #endif + void flush() { + out.flush(); + } - iterator() = default; - iterator(CSVReader* reader) : daddy(reader) {}; - iterator(CSVReader*, CSVRow&&); + private: + template< + typename T, + csv::enable_if_t< + !std::is_convertible::value + && !std::is_convertible::value + , int> = 0 + > + std::string csv_escape(T in) { + return internals::to_string(in); + } - /** Access the CSVRow held by the iterator */ - CONSTEXPR reference operator*() { return this->row; } + template< + typename T, + csv::enable_if_t< + std::is_convertible::value + || std::is_convertible::value + , int> = 0 + > + std::string csv_escape(T in) { + IF_CONSTEXPR(std::is_convertible::value) { + return _csv_escape(in); + } + + return _csv_escape(std::string(in)); + } - /** Return a pointer to the CSVRow the iterator has stopped at */ - CONSTEXPR pointer operator->() { return &(this->row); } + std::string _csv_escape(csv::string_view in) { + /** Format a string to be RFC 4180-compliant + * @param[in] in String to be CSV-formatted + * @param[out] quote_minimal Only quote fields if necessary. + * If False, everything is quoted. + */ - iterator& operator++(); /**< Pre-increment iterator */ - iterator operator++(int); /**< Post-increment ierator */ - iterator& operator--(); + // Do we need a quote escape + bool quote_escape = false; - /** Returns true if iterators were constructed from the same CSVReader - * and point to the same row - */ - CONSTEXPR bool operator==(const iterator& other) const { - return (this->daddy == other.daddy) && (this->i == other.i); + for (auto ch : in) { + if (ch == Quote || ch == Delim || ch == '\r' || ch == '\n') { + quote_escape = true; + break; + } } - CONSTEXPR bool operator!=(const iterator& other) const { return !operator==(other); } - private: - CSVReader * daddy = nullptr; // Pointer to parent - CSVRow row; // Current row - RowCount i = 0; // Index of current row - }; + if (!quote_escape) { + if (quote_minimal) return std::string(in); + else { + std::string ret(1, Quote); + ret += in.data(); + ret += Quote; + return ret; + } + } - /** @name Constructors - * Constructors for iterating over large files and parsing in-memory sources. - */ - ///@{ - CSVReader(csv::string_view filename, CSVFormat format = CSVFormat::guess_csv()); - CSVReader(CSVFormat format = CSVFormat()); - ///@} + // Start initial quote escape sequence + std::string ret(1, Quote); + for (auto ch: in) { + if (ch == Quote) ret += std::string(2, Quote); + else ret += ch; + } - CSVReader(const CSVReader&) = delete; // No copy constructor - CSVReader(CSVReader&&) = default; // Move constructor - CSVReader& operator=(const CSVReader&) = delete; // No copy assignment - CSVReader& operator=(CSVReader&& other) = default; + // Finish off quote escape + ret += Quote; + return ret; + } - /** @name Reading In-Memory Strings - * You can piece together incomplete CSV fragments by calling feed() on them - * before finally calling end_feed(). - * - * Alternatively, you can also use the parse() shorthand function for - * smaller strings. - */ - ///@{ - void feed(csv::string_view in); - void end_feed(); - ///@} + /** Recurisve template for writing std::tuples */ + template + typename std::enable_if::type write_tuple(const std::tuple& record) { + out << csv_escape(std::get(record)); - /** @name Retrieving CSV Rows */ - ///@{ - bool read_row(CSVRow &row); - iterator begin(); - HEDLEY_CONST iterator end() const; - ///@} + IF_CONSTEXPR (Index + 1 < sizeof...(T)) out << Delim; - /** @name CSV Metadata */ - ///@{ - CSVFormat get_format() const; - std::vector get_col_names() const; - int index_of(csv::string_view col_name) const; - ///@} + this->write_tuple(record); + } - /** @name CSV Metadata: Attributes */ - ///@{ - bool empty() const { return this->size() == 0; } - RowCount size() const { return this->num_rows; } - bool utf8_bom() const { return this->_utf8_bom; } - ///@} + /** Base case for writing std::tuples */ + template + typename std::enable_if::type write_tuple(const std::tuple& record) { + (void)record; + end_out(); + } - protected: - /** - * \defgroup csv_internal CSV Parser Internals - * @brief Internals of CSVReader. Only maintainers and those looking to - * extend the parser should read this. - * @{ - */ + /** Ends a line in 'out' and flushes, if Flush is true.*/ + void end_out() { + out << '\n'; + IF_CONSTEXPR(Flush) out.flush(); + } - /** Multi-threaded Reading State, including synchronization objects that cannot be moved. */ - struct ThreadedReadingState { - std::deque feed_buffer; /**< Message queue for worker */ - std::mutex feed_lock; /**< Allow only one worker to write */ - std::condition_variable feed_cond; /**< Wake up worker */ - }; + OutputStream & out; + bool quote_minimal; + }; - /** Open a file for reading. */ - void fopen(csv::string_view filename); + /** An alias for csv::DelimWriter for writing standard CSV files + * + * @sa csv::DelimWriter::operator<<() + * + * @note Use `csv::make_csv_writer()` to in instatiate this class over + * an actual output stream. + */ + template + using CSVWriter = DelimWriter; - size_t file_size; + /** Class for writing tab-separated values files + * + * @sa csv::DelimWriter::write_row() + * @sa csv::DelimWriter::operator<<() + * + * @note Use `csv::make_tsv_writer()` to in instatiate this class over + * an actual output stream. + */ + template + using TSVWriter = DelimWriter; - /** Sets this reader's column names and associated data */ - void set_col_names(const std::vector&); + /** Return a csv::CSVWriter over the output stream */ + template + inline CSVWriter make_csv_writer(OutputStream& out, bool quote_minimal=true) { + return CSVWriter(out, quote_minimal); + } - /** Returns true if we have reached end of file */ - bool eof() { return this->csv_mmap_eof; }; + /** Return a buffered csv::CSVWriter over the output stream (does not auto flush) */ + template + inline CSVWriter make_csv_writer_buffered(OutputStream& out, bool quote_minimal=true) { + return CSVWriter(out, quote_minimal); + } - /** @name CSV Settings **/ - ///@{ - CSVFormat _format; - ///@} + /** Return a csv::TSVWriter over the output stream */ + template + inline TSVWriter make_tsv_writer(OutputStream& out, bool quote_minimal=true) { + return TSVWriter(out, quote_minimal); + } - /** @name Parser State */ - ///@{ - /** Pointer to a object containing column information */ - internals::ColNamesPtr col_names = std::make_shared(); + /** Return a buffered csv::TSVWriter over the output stream (does not auto flush) */ + template + inline TSVWriter make_tsv_writer_buffered(OutputStream& out, bool quote_minimal=true) { + return TSVWriter(out, quote_minimal); + } + ///@} +} - // TODO: Update description - /** Buffer for current row being parsed */ - BasicCSVParser parser = BasicCSVParser(this->col_names); - /** Queue of parsed CSV rows */ - std::deque records; +namespace csv { + namespace internals { + CSV_INLINE size_t get_file_size(csv::string_view filename) { + std::ifstream infile(std::string(filename), std::ios::binary); + const auto start = infile.tellg(); + infile.seekg(0, std::ios::end); + const auto end = infile.tellg(); - /** Whether or not an attempt to find Unicode BOM has been made */ - bool unicode_bom_scan = false; + return end - start; + } - /** Whether or not rows before header were trimmed */ - bool header_trimmed = false; + CSV_INLINE std::string get_csv_head(csv::string_view filename) { + return get_csv_head(filename, get_file_size(filename)); + } - /** The number of columns in this CSV */ - size_t n_cols = 0; + CSV_INLINE std::string get_csv_head(csv::string_view filename, size_t file_size) { + const size_t bytes = 500000; - /** How many rows (minus header) have been parsed so far */ - RowCount num_rows = 0; + std::error_code error; + size_t length = std::min((size_t)file_size, bytes); + auto mmap = mio::make_mmap_source(std::string(filename), 0, length, error); - /** Set to true if UTF-8 BOM was detected */ - bool _utf8_bom = false; - ///@} + if (error) { + throw std::runtime_error("Cannot open file " + std::string(filename)); + } - /** @name Multi-Threaded File Reading Functions */ - ///@{ - void feed(internals::WorkItem&&); /**< @brief Helper for read_csv_worker() */ - void read_csv(const size_t& bytes = internals::ITERATION_CHUNK_SIZE); + return std::string(mmap.begin(), mmap.end()); + } - size_t relative_mmap_pos = 0; +#ifdef _MSC_VER +#pragma region IBasicCVParser +#endif + CSV_INLINE IBasicCSVParser::IBasicCSVParser( + const CSVFormat& format, + const ColNamesPtr& col_names + ) : _col_names(col_names) { + if (format.no_quote) { + _parse_flags = internals::make_parse_flags(format.get_delim()); + } + else { + _parse_flags = internals::make_parse_flags(format.get_delim(), format.quote_char); + } - void read_csv_worker(); - std::string _filename = ""; - ///@} + _ws_flags = internals::make_ws_flags( + format.trim_chars.data(), format.trim_chars.size() + ); + } - /** @name Multi-Threaded File Reading: Flags and State */ - ///@{ - mio::mmap_source csv_mmap; - bool csv_mmap_eof = true; - size_t csv_mmap_pos = 0; - std::unique_ptr feed_state; - ///@} + CSV_INLINE void IBasicCSVParser::end_feed() { + using internals::ParseFlags; - /**@}*/ // End of parser internals + bool empty_last_field = this->data_ptr + && this->data_ptr->_data + && !this->data_ptr->data.empty() + && (parse_flag(this->data_ptr->data.back()) == ParseFlags::DELIMITER + || parse_flag(this->data_ptr->data.back()) == ParseFlags::QUOTE); - private: - /** Set parse and whitespace flags */ - void set_parse_flags(const CSVFormat& format); - }; -} -/** @file - * Calculates statistics from CSV files - */ + // Push field + if (this->field_length > 0 || empty_last_field) { + this->push_field(); + } -#include -#include + // Push row + if (this->current_row.size() > 0) + this->push_row(); + } -namespace csv { - /** Class for calculating statistics from CSV files and in-memory sources - * - * **Example** - * \include programs/csv_stats.cpp - * - */ - class CSVStat : public CSVReader { - public: - using FreqCount = std::unordered_map; - using TypeCount = std::unordered_map; + CSV_INLINE void IBasicCSVParser::parse_field() noexcept { + using internals::ParseFlags; + auto& in = this->data_ptr->data; - void end_feed(); - std::vector get_mean() const; - std::vector get_variance() const; - std::vector get_mins() const; - std::vector get_maxes() const; - std::vector get_counts() const; - std::vector get_dtypes() const; + // Trim off leading whitespace + while (data_pos < in.size() && ws_flag(in[data_pos])) + data_pos++; - CSVStat(csv::string_view filename, CSVFormat format = CSVFormat::guess_csv()); - CSVStat(CSVFormat format = CSVFormat()) : CSVReader(format) {}; - private: - // An array of rolling averages - // Each index corresponds to the rolling mean for the column at said index - std::vector rolling_means; - std::vector rolling_vars; - std::vector mins; - std::vector maxes; - std::vector counts; - std::vector dtypes; - std::vector n; + if (field_start == UNINITIALIZED_FIELD) + field_start = (int)(data_pos - current_row_start()); - // Statistic calculators - void variance(const long double&, const size_t&); - void count(CSVField&, const size_t&); - void min_max(const long double&, const size_t&); - void dtype(CSVField&, const size_t&); + // Optimization: Since NOT_SPECIAL characters tend to occur in contiguous + // sequences, use the loop below to avoid having to go through the outer + // switch statement as much as possible + while (data_pos < in.size() && compound_parse_flag(in[data_pos]) == ParseFlags::NOT_SPECIAL) + data_pos++; - void calc(); - void calc_worker(const size_t&); - }; -} + field_length = data_pos - (field_start + current_row_start()); -#include -#include -#include + // Trim off trailing whitespace, this->field_length constraint matters + // when field is entirely whitespace + for (size_t j = data_pos - 1; ws_flag(in[j]) && this->field_length > 0; j--) + this->field_length--; + } -namespace csv { - /** Returned by get_file_info() */ - struct CSVFileInfo { - std::string filename; /**< Filename */ - std::vector col_names; /**< CSV column names */ - char delim; /**< Delimiting character */ - RowCount n_rows; /**< Number of rows in a file */ - int n_cols; /**< Number of columns in a CSV */ - }; + CSV_INLINE void IBasicCSVParser::push_field() + { + // Update + if (field_has_double_quote) { + fields->emplace_back( + field_start == UNINITIALIZED_FIELD ? 0 : (unsigned int)field_start, + field_length, + true + ); + field_has_double_quote = false; - /** @name Shorthand Parsing Functions - * @brief Convienience functions for parsing small strings - */ - ///@{ - CSVReader operator ""_csv(const char*, size_t); - CSVReader parse(csv::string_view in, CSVFormat format = CSVFormat()); - ///@} + } + else { + fields->emplace_back( + field_start == UNINITIALIZED_FIELD ? 0 : (unsigned int)field_start, + field_length + ); + } - /** @name Utility Functions */ - ///@{ - std::unordered_map csv_data_types(const std::string&); - CSVFileInfo get_file_info(const std::string& filename); - int get_col_pos(const std::string filename, const std::string col_name, - const CSVFormat format = CSVFormat::guess_csv()); - ///@} -} -/** @file - * A standalone header file for writing delimiter-separated files - */ + current_row.row_length++; + + // Reset field state + field_start = UNINITIALIZED_FIELD; + field_length = 0; + } + + /** @return The number of characters parsed that belong to complete rows */ + CSV_INLINE size_t IBasicCSVParser::parse() + { + using internals::ParseFlags; + + this->quote_escape = false; + this->data_pos = 0; + this->current_row_start() = 0; + this->trim_utf8_bom(); + + auto& in = this->data_ptr->data; + while (this->data_pos < in.size()) { + switch (compound_parse_flag(in[this->data_pos])) { + case ParseFlags::DELIMITER: + this->push_field(); + this->data_pos++; + break; + + case ParseFlags::NEWLINE: + this->data_pos++; + + // Catches CRLF (or LFLF, CRCRLF, or any other non-sensical combination of newlines) + while (this->data_pos < in.size() && parse_flag(in[this->data_pos]) == ParseFlags::NEWLINE) + this->data_pos++; + + // End of record -> Write record + this->push_field(); + this->push_row(); -#include -#include -#include -#include + // Reset + this->current_row = CSVRow(data_ptr, this->data_pos, fields->size()); + break; -namespace csv { - /** @name CSV Writing */ - ///@{ - #ifndef DOXYGEN_SHOULD_SKIP_THIS - template - inline std::string csv_escape(csv::string_view in, const bool quote_minimal = true) { - /** Format a string to be RFC 4180-compliant - * @param[in] in String to be CSV-formatted - * @param[out] quote_minimal Only quote fields if necessary. - * If False, everything is quoted. - */ + case ParseFlags::NOT_SPECIAL: + this->parse_field(); + break; + + case ParseFlags::QUOTE_ESCAPE_QUOTE: + if (data_pos + 1 == in.size()) return this->current_row_start(); + else if (data_pos + 1 < in.size()) { + auto next_ch = parse_flag(in[data_pos + 1]); + if (next_ch >= ParseFlags::DELIMITER) { + quote_escape = false; + data_pos++; + break; + } + else if (next_ch == ParseFlags::QUOTE) { + // Case: Escaped quote + data_pos += 2; + this->field_length += 2; + this->field_has_double_quote = true; + break; + } + } + + // Case: Unescaped single quote => not strictly valid but we'll keep it + this->field_length++; + data_pos++; - // Sequence used for escaping quote characters that appear in text - constexpr char double_quote[3] = { Quote, Quote }; + break; - std::string new_string; - bool quote_escape = false; // Do we need a quote escape - new_string += Quote; // Start initial quote escape sequence + default: // Quote (currently not quote escaped) + if (this->field_length == 0) { + quote_escape = true; + data_pos++; + if (field_start == UNINITIALIZED_FIELD && data_pos < in.size() && !ws_flag(in[data_pos])) + field_start = (int)(data_pos - current_row_start()); + break; + } - for (size_t i = 0; i < in.size(); i++) { - switch (in[i]) { - case Quote: - new_string += double_quote; - quote_escape = true; - break; - case Delim: - quote_escape = true; - HEDLEY_FALL_THROUGH; - default: - new_string += in[i]; + // Case: Unescaped quote + this->field_length++; + data_pos++; + + break; + } } + + return this->current_row_start(); } - if (quote_escape || !quote_minimal) { - new_string += Quote; // Finish off quote escape - return new_string; + CSV_INLINE void IBasicCSVParser::push_row() { + current_row.row_length = fields->size() - current_row.fields_start; + this->_records->push_back(std::move(current_row)); } - return std::string(in); - } - #endif + CSV_INLINE void IBasicCSVParser::reset_data_ptr() { + this->data_ptr = std::make_shared(); + this->data_ptr->parse_flags = this->_parse_flags; + this->data_ptr->col_names = this->_col_names; + this->fields = &(this->data_ptr->fields); + } - /** - * Class for writing delimiter separated values files - * - * To write formatted strings, one should - * -# Initialize a DelimWriter with respect to some output stream - * -# Call write_row() on std::vectors of unformatted text - * - * @tparam OutputStream The output stream, e.g. `std::ofstream`, `std::stringstream` - * @tparam Delim The delimiter character - * @tparam Quote The quote character - * - * @par Hint - * Use the aliases csv::CSVWriter to write CSV - * formatted strings and csv::TSVWriter - * to write tab separated strings - * - * @par Example - * @snippet test_write_csv.cpp CSV Writer Example - */ - template - class DelimWriter { - public: - /** Construct a DelimWriter over the specified output stream */ - DelimWriter(OutputStream& _out) : out(_out) {}; + CSV_INLINE void IBasicCSVParser::trim_utf8_bom() { + auto& data = this->data_ptr->data; - /** Construct a DelimWriter over the file - * - * @param[out] filename File to write to - */ - DelimWriter(const std::string& filename) : DelimWriter(std::ifstream(filename)) {}; + if (!this->unicode_bom_scan && data.size() >= 3) { + if (data[0] == '\xEF' && data[1] == '\xBB' && data[2] == '\xBF') { + this->data_pos += 3; // Remove BOM from input string + this->_utf8_bom = true; + } - /** Format a sequence of strings and write to CSV according to RFC 4180 - * - * @warning This does not check to make sure row lengths are consistent - * - * @param[in] record Sequence of strings to be formatted - * @param quote_minimal Only quote fields if necessary - */ - template class Container> - void write_row(const Container& record, bool quote_minimal = true) { - const size_t ilen = record.size(); - size_t i = 0; - for (auto& field: record) { - out << csv_escape(field, quote_minimal); - if (i + 1 != ilen) out << Delim; - i++; + this->unicode_bom_scan = true; } - - out << std::endl; } +#ifdef _MSC_VER +#pragma endregion +#endif - /** @copydoc write_row - * @return The current DelimWriter instance (allowing for operator chaining) - */ - template class Container> - DelimWriter& operator<<(const Container& record) { - this->write_row(record); - return *this; - } +#ifdef _MSC_VER +#pragma region Specializations +#endif + CSV_INLINE void MmapParser::next(size_t bytes = ITERATION_CHUNK_SIZE) { + // Reset parser state + this->field_start = UNINITIALIZED_FIELD; + this->field_length = 0; + this->reset_data_ptr(); - private: - OutputStream & out; - }; + // Create memory map + size_t length = std::min(this->source_size - this->mmap_pos, bytes); + std::error_code error; + this->data_ptr->_data = std::make_shared>(mio::make_mmap_source(this->_filename, this->mmap_pos, length, error)); + this->mmap_pos += length; + if (error) throw error; - /* Uncomment when C++17 support is better - template - DelimWriter(OutputStream&) -> DelimWriter; - */ + auto mmap_ptr = (mio::basic_mmap_source*)(this->data_ptr->_data.get()); - /** Class for writing CSV files - * - * @sa csv::DelimWriter::write_row() - * @sa csv::DelimWriter::operator<<() - * - * @note Use `csv::make_csv_writer()` to in instatiate this class over - * an actual output stream. - */ - template - using CSVWriter = DelimWriter; + // Create string view + this->data_ptr->data = csv::string_view(mmap_ptr->data(), mmap_ptr->length()); - /** Class for writing tab-separated values files -* - * @sa csv::DelimWriter::write_row() - * @sa csv::DelimWriter::operator<<() - * - * @note Use `csv::make_tsv_writer()` to in instatiate this class over - * an actual output stream. - */ - template - using TSVWriter = DelimWriter; + // Parse + this->current_row = CSVRow(this->data_ptr); + size_t remainder = this->parse(); - // - // Temporary: Until more C++17 compilers support template deduction guides - // - template - inline CSVWriter make_csv_writer(OutputStream& out) { - /** Return a CSVWriter over the output stream */ - return CSVWriter(out); - } + if (this->mmap_pos == this->source_size || no_chunk()) { + this->_eof = true; + this->end_feed(); + } - template - inline TSVWriter make_tsv_writer(OutputStream& out) { - /** Return a TSVWriter over the output stream */ - return TSVWriter(out); + this->mmap_pos -= (length - remainder); + } +#ifdef _MSC_VER +#pragma endregion +#endif } - - ///@} } @@ -6366,7 +7229,7 @@ namespace csv { return CSV_NOT_FOUND; } - CSV_INLINE size_t ColNames::size() const { + CSV_INLINE size_t ColNames::size() const noexcept { return this->col_names.size(); } @@ -6413,6 +7276,8 @@ namespace csv { } CSV_INLINE CSVFormat& CSVFormat::header_row(int row) { + if (row < 0) this->variable_column_policy = VariableColumnPolicy::KEEP; + this->header = row; this->col_names = {}; return *this; @@ -6465,10 +7330,6 @@ namespace csv { * @brief Defines functionality needed for basic CSV parsing */ -#include -#include // For read_csv() -#include -#include namespace csv { namespace internals { @@ -6478,8 +7339,9 @@ namespace csv { for (size_t i = 0; i < row.size(); i++) { ret << row[i]; if (i + 1 < row.size()) ret << delim; - else ret << std::endl; + else ret << '\n'; } + ret.flush(); return ret.str(); } @@ -6491,24 +7353,94 @@ namespace csv { * */ CSV_INLINE std::vector _get_col_names(csv::string_view head, CSVFormat format) { - auto parse_flags = internals::make_parse_flags(format.get_delim()); - if (format.is_quoting_enabled()) { - parse_flags = internals::make_parse_flags(format.get_delim(), format.get_quote_char()); - } - // Parse the CSV auto trim_chars = format.get_trim_chars(); + std::stringstream source(head.data()); + RowCollection rows; - BasicCSVParser parser( - parse_flags, - internals::make_ws_flags(trim_chars.data(), trim_chars.size()) - ); - - std::deque rows; - parser.parse(head, rows); + StreamParser parser(source, format); + parser.set_output(rows); + parser.next(); return CSVRow(std::move(rows[format.get_header()])); } + + CSV_INLINE GuessScore calculate_score(csv::string_view head, const CSVFormat& format) { + // Frequency counter of row length + std::unordered_map row_tally = { { 0, 0 } }; + + // Map row lengths to row num where they first occurred + std::unordered_map row_when = { { 0, 0 } }; + + // Parse the CSV + std::stringstream source(head.data()); + RowCollection rows; + + StreamParser parser(source, format); + parser.set_output(rows); + parser.next(); + + for (size_t i = 0; i < rows.size(); i++) { + auto& row = rows[i]; + + // Ignore zero-length rows + if (row.size() > 0) { + if (row_tally.find(row.size()) != row_tally.end()) { + row_tally[row.size()]++; + } + else { + row_tally[row.size()] = 1; + row_when[row.size()] = i; + } + } + } + + double final_score = 0; + size_t header_row = 0; + + // Final score is equal to the largest + // row size times rows of that size + for (auto& pair : row_tally) { + auto row_size = pair.first; + auto row_count = pair.second; + double score = (double)(row_size * row_count); + if (score > final_score) { + final_score = score; + header_row = row_when[row_size]; + } + } + + return { + final_score, + header_row + }; + } + + /** Guess the delimiter used by a delimiter-separated values file */ + CSV_INLINE CSVGuessResult _guess_format(csv::string_view head, const std::vector& delims) { + /** For each delimiter, find out which row length was most common. + * The delimiter with the longest mode row length wins. + * Then, the line number of the header row is the first row with + * the mode row length. + */ + + CSVFormat format; + size_t max_score = 0, + header = 0; + char current_delim = delims[0]; + + for (char cand_delim : delims) { + auto result = calculate_score(head, format.delimiter(cand_delim)); + + if ((size_t)result.score > max_score) { + max_score = (size_t)result.score; + current_delim = cand_delim; + header = result.header; + } + } + + return { current_delim, (int)header }; + } } /** Return a CSV's column names @@ -6535,23 +7467,10 @@ namespace csv { return internals::_guess_format(head, delims); } - /** Allows parsing in-memory sources (by calling feed() and end_feed()). */ - CSV_INLINE CSVReader::CSVReader(CSVFormat format) : - unicode_bom_scan(!format.unicode_detect), feed_state(new ThreadedReadingState) { - if (!format.col_names.empty()) { - this->set_col_names(format.col_names); - } - - this->set_parse_flags(format); - } - - /** Allows reading a CSV file in chunks, using overlapped - * threads for simulatenously reading from disk and parsing. - * Rows should be retrieved with read_row() or by using - * CSVReader::iterator. + /** Reads an arbitrarily large CSV file using memory-mapped IO. * - * **Details:** Reads the first 500kB of a CSV file to infer file information - * such as column names and delimiting character. + * **Details:** Reads the first block of a CSV file synchronously to get information + * such as column names and delimiting character. * * @param[in] filename Path to CSV file * @param[in] format Format of the CSV file @@ -6559,25 +7478,23 @@ namespace csv { * \snippet tests/test_read_csv.cpp CSVField Example * */ - CSV_INLINE CSVReader::CSVReader(csv::string_view filename, CSVFormat format) : feed_state(new ThreadedReadingState) { + CSV_INLINE CSVReader::CSVReader(csv::string_view filename, CSVFormat format) : _format(format) { auto head = internals::get_csv_head(filename); + using Parser = internals::MmapParser; /** Guess delimiter and header row */ if (format.guess_delim()) { auto guess_result = internals::_guess_format(head, format.possible_delimiters); format.delimiter(guess_result.delim); format.header = guess_result.header_row; + this->_format = format; } - if (format.col_names.empty()) { - this->set_col_names(internals::_get_col_names(head, format)); - } - else { + if (!format.col_names.empty()) this->set_col_names(format.col_names); - } - this->set_parse_flags(format); - this->fopen(filename); + this->parser = std::unique_ptr(new Parser(filename, format, this->col_names)); // For C++11 + this->initial_read(); } /** Return the format of the original raw CSV */ @@ -6613,116 +7530,21 @@ namespace csv { return CSV_NOT_FOUND; } - CSV_INLINE void CSVReader::feed(internals::WorkItem&& buff) { - this->feed( csv::string_view(buff.first.get(), buff.second) ); - } - - /** Parse a CSV-formatted string. - * - * @par Usage - * Incomplete CSV fragments can be joined together by calling feed() on them sequentially. - * - * @note - * `end_feed()` should be called after the last string. - */ - CSV_INLINE void CSVReader::feed(csv::string_view in) { - if (in.empty()) return; - - /** Handle possible Unicode byte order mark */ - if (!this->unicode_bom_scan) { - if (in[0] == '\xEF' && in[1] == '\xBB' && in[2] == '\xBF') { - in.remove_prefix(3); // Remove BOM from input string - this->_utf8_bom = true; - } - - this->unicode_bom_scan = true; - } - - this->parser.parse(in, this->records); - + CSV_INLINE void CSVReader::trim_header() { if (!this->header_trimmed) { - for (int i = 0; i <= this->_format.header && !this->records.empty(); i++) { + for (int i = 0; i <= this->_format.header && !this->records->empty(); i++) { if (i == this->_format.header && this->col_names->empty()) { - this->set_col_names(CSVRow(std::move(this->records.front()))); + this->set_col_names(this->records->pop_front()); + } + else { + this->records->pop_front(); } - - this->records.pop_front(); } this->header_trimmed = true; } } - CSV_INLINE void CSVReader::end_feed() { - /** Indicate that there is no more data to receive, - * and handle the last row - */ - this->parser.end_feed(this->records); - } - - /** Worker thread for read_csv() which parses CSV rows (while the main - * thread pulls data from disk) - */ - CSV_INLINE void CSVReader::read_csv_worker() { - while (true) { - std::unique_lock lock{ this->feed_state->feed_lock }; // Get lock - this->feed_state->feed_cond.wait(lock, // Wait - [this] { return !(this->feed_state->feed_buffer.empty()); }); - - // Wake-up - auto in = std::move(this->feed_state->feed_buffer.front()); - this->feed_state->feed_buffer.pop_front(); - - // Nullptr --> Die - if (!in.first) break; - - lock.unlock(); // Release lock - this->feed(std::move(in)); - } - } - - CSV_INLINE void CSVReader::set_parse_flags(const CSVFormat& format) - { - this->_format = format; - if (format.no_quote) { - this->parser.set_parse_flags(internals::make_parse_flags(format.get_delim())); - } - else { - this->parser.set_parse_flags(internals::make_parse_flags(format.get_delim(), format.quote_char)); - } - - this->parser.set_ws_flags(internals::make_ws_flags(format.trim_chars.data(), format.trim_chars.size())); - } - - CSV_INLINE void CSVReader::fopen(csv::string_view filename) { - this->_filename = filename; - - if (!this->csv_mmap.is_open()) { - this->csv_mmap_eof = false; - std::ifstream infile(_filename, std::ios::binary); - const auto start = infile.tellg(); - infile.seekg(0, std::ios::end); - const auto end = infile.tellg(); - this->file_size = end - start; - - std::error_code error; - - if (internals::get_available_memory() > this->file_size * 2) { - this->csv_mmap.map(filename, error); - } - else { - this->csv_mmap.map(filename, 0, - std::min((size_t)csv::internals::ITERATION_CHUNK_SIZE, this->file_size), - error - ); - } - - if (error) { - throw error; - } - } - } - /** * @param[in] names Column names */ @@ -6733,83 +7555,38 @@ namespace csv { } /** - * Parse a CSV file using multiple threads - * - * @pre CSVReader::infile points to a valid file handle, i.e. CSVReader::fopen was called + * Read a chunk of CSV data. * - * @param[in] bytes Number of bytes to read. - * @see CSVReader::read_row() - */ - CSV_INLINE void CSVReader::read_csv(const size_t& bytes) { - if (this->_filename.empty()) { - return; - } - - const size_t BUFFER_UPPER_LIMIT = std::min(bytes, (size_t)1000000); - std::unique_ptr buffer(new char[BUFFER_UPPER_LIMIT]); - auto * HEDLEY_RESTRICT line_buffer = buffer.get(); - line_buffer[0] = '\0'; - - std::thread worker(&CSVReader::read_csv_worker, this); - - size_t strlen = 0; - for (size_t processed = 0; this->csv_mmap_pos < this->file_size && processed < bytes; this->csv_mmap_pos++) { - if (this->relative_mmap_pos == this->csv_mmap.length()) { - std::error_code error; - - size_t length = std::min(this->file_size - this->csv_mmap_pos, csv::internals::ITERATION_CHUNK_SIZE); - this->csv_mmap = mio::make_mmap_source(this->_filename, this->csv_mmap_pos, - length, - error - ); - - if (error) { - throw error; - } - - this->relative_mmap_pos = 0; - } - - line_buffer[strlen] = this->csv_mmap[this->relative_mmap_pos]; - strlen++; - this->relative_mmap_pos++; - - if (strlen == BUFFER_UPPER_LIMIT - 1) { - processed += strlen; - line_buffer[strlen] = '\0'; - - std::unique_lock lock{ this->feed_state->feed_lock }; - - this->feed_state->feed_buffer.push_back(std::make_pair<>(std::move(buffer), strlen)); + * @note This method is meant to be run on its own thread. Only one `read_csv()` thread + * should be active at a time. + * + * @param[in] bytes Number of bytes to read. + * + * @see CSVReader::read_csv_worker + * @see CSVReader::read_row() + */ + CSV_INLINE bool CSVReader::read_csv(size_t bytes) { + // Tell read_row() to listen for CSV rows + this->records->notify_all(); - buffer = std::unique_ptr(new char[BUFFER_UPPER_LIMIT]); // New pointer - line_buffer = buffer.get(); - line_buffer[0] = '\0'; - strlen = 0; + this->parser->set_output(*this->records); + this->parser->next(bytes); - this->feed_state->feed_cond.notify_one(); - } + if (!this->header_trimmed) { + this->trim_header(); } - // Feed remaining bits - std::unique_lock lock{ this->feed_state->feed_lock }; - this->feed_state->feed_buffer.push_back(std::make_pair<>(std::move(buffer), strlen)); - this->feed_state->feed_buffer.push_back(std::make_pair<>(nullptr, 0)); // Termination signal - this->feed_state->feed_cond.notify_one(); - lock.unlock(); - worker.join(); - - if (this->csv_mmap_pos == this->csv_mmap.length()) { - this->csv_mmap_eof = true; - this->end_feed(); - } + // Tell read_row() to stop waiting + this->records->kill_all(); + + return true; } /** * Retrieve rows as CSVRow objects, returning true if more rows are available. * - * **Performance Notes**: - * - The number of rows read in at a time is determined by csv::ITERATION_CHUNK_SIZE + * @par Performance Notes + * - Reads chunks of data that are csv::internals::ITERATION_CHUNK_SIZE bytes large at a time * - For performance details, read the documentation for CSVRow and CSVField. * * @param[out] row The variable where the parsed row will be stored @@ -6820,33 +7597,36 @@ namespace csv { * */ CSV_INLINE bool CSVReader::read_row(CSVRow &row) { - if (this->records.empty()) { - if (!this->eof()) { - this->read_csv(internals::ITERATION_CHUNK_SIZE); - } - else return false; // Stop reading - } + while (true) { + if (this->records->empty()) { + if (this->records->is_waitable()) + // Reading thread is currently active => wait for it to populate records + this->records->wait(); + else if (this->parser->eof()) + // End of file and no more records + return false; + else { + // Reading thread is not active => start another one + if (this->read_csv_worker.joinable()) + this->read_csv_worker.join(); - while (!this->records.empty()) { - if (this->records.front().size() != this->n_cols && + this->read_csv_worker = std::thread(&CSVReader::read_csv, this, internals::ITERATION_CHUNK_SIZE); + } + } + else if (this->records->front().size() != this->n_cols && this->_format.variable_column_policy != VariableColumnPolicy::KEEP) { + auto errored_row = this->records->pop_front(); + if (this->_format.variable_column_policy == VariableColumnPolicy::THROW) { - auto errored_row = std::move(this->records.front()); - if (this->records.front().size() < this->n_cols) { + if (errored_row.size() < this->n_cols) throw std::runtime_error("Line too short " + internals::format_row(errored_row)); - } throw std::runtime_error("Line too long " + internals::format_row(errored_row)); } - - // Silently drop row (default) - this->records.pop_front(); } else { - row = std::move(this->records.front()); - - this->num_rows++; - this->records.pop_front(); + row = this->records->pop_front(); + this->_n_rows++; return true; } } @@ -6855,105 +7635,6 @@ namespace csv { } } -#include - -namespace csv { - namespace internals { - CSV_INLINE GuessScore calculate_score(csv::string_view head, CSVFormat format) { - // Frequency counter of row length - std::unordered_map row_tally = { { 0, 0 } }; - - // Map row lengths to row num where they first occurred - std::unordered_map row_when = { { 0, 0 } }; - - // Parse the CSV - BasicCSVParser parser( - internals::make_parse_flags(format.get_delim(), '"'), - internals::make_ws_flags({}, 0) - ); - - std::deque rows; - parser.parse(head, rows); - - for (size_t i = 0; i < rows.size(); i++) { - auto& row = rows[i]; - - // Ignore zero-length rows - if (row.size() > 0) { - if (row_tally.find(row.size()) != row_tally.end()) { - row_tally[row.size()]++; - } - else { - row_tally[row.size()] = 1; - row_when[row.size()] = i; - } - } - } - - double final_score = 0; - size_t header_row = 0; - - // Final score is equal to the largest - // row size times rows of that size - for (auto& [row_size, row_count] : row_tally) { - double score = (double)(row_size * row_count); - if (score > final_score) { - final_score = score; - header_row = row_when[row_size]; - } - } - - return { - final_score, - header_row - }; - } - - /** Guess the delimiter used by a delimiter-separated values file */ - CSV_INLINE CSVGuessResult _guess_format(csv::string_view head, const std::vector& delims) { - /** For each delimiter, find out which row length was most common. - * The delimiter with the longest mode row length wins. - * Then, the line number of the header row is the first row with - * the mode row length. - */ - - CSVFormat format; - size_t max_score = 0, - header = 0; - char current_delim = delims[0]; - - for (char cand_delim : delims) { - auto result = calculate_score(head, format.delimiter(cand_delim)); - - if (result.score > max_score) { - max_score = (size_t)result.score; - current_delim = cand_delim; - header = result.header; - } - } - - return { current_delim, (int)header }; - } - - CSV_INLINE std::string get_csv_head(csv::string_view filename) { - const size_t bytes = 500000; - std::ifstream infile(filename.data()); - if (!infile.is_open()) { - throw std::runtime_error("Cannot open file " + std::string(filename)); - } - - std::unique_ptr buffer(new char[bytes + 1]); - char * head_buffer = buffer.get(); - - for (size_t i = 0; i < bytes + 1; i++) { - head_buffer[i] = '\0'; - } - - infile.read(head_buffer, bytes); - return std::string(head_buffer); - } - } -} /** @file * Defines an input iterator for csv::CSVReader */ @@ -6962,25 +7643,23 @@ namespace csv { namespace csv { /** Return an iterator to the first row in the reader */ CSV_INLINE CSVReader::iterator CSVReader::begin() { - if (this->records.empty()) { - this->read_csv(); + if (this->records->empty()) { + this->read_csv_worker = std::thread(&CSVReader::read_csv, this, internals::ITERATION_CHUNK_SIZE); + this->read_csv_worker.join(); // Still empty => return end iterator - if (this->records.empty()) { - return this->end(); - } + if (this->records->empty()) return this->end(); } - CSVReader::iterator ret(this, std::move(this->records.front())); - - this->records.pop_front(); + this->_n_rows++; + CSVReader::iterator ret(this, this->records->pop_front()); return ret; } /** A placeholder for the imaginary past the end row in a CSV. * Attempting to deference this will lead to bad things. */ - CSV_INLINE HEDLEY_CONST CSVReader::iterator CSVReader::end() const { + CSV_INLINE HEDLEY_CONST CSVReader::iterator CSVReader::end() const noexcept { return CSVReader::iterator(); } @@ -6995,7 +7674,10 @@ namespace csv { /** Advance the iterator by one row. If this CSVReader has an * associated file, then the iterator will lazily pull more data from - * that file until EOF. + * that file until the end of file is reached. + * + * @note This iterator does **not** block the thread responsible for parsing CSV. + * */ CSV_INLINE CSVReader::iterator& CSVReader::iterator::operator++() { if (!daddy->read_row(this->row)) { @@ -7015,6 +7697,7 @@ namespace csv { return temp; } } + /** @file * Defines the data type used for storing information about a CSV row */ @@ -7024,10 +7707,17 @@ namespace csv { namespace csv { namespace internals { - CSV_INLINE void CSVFieldArray::allocate() { - RawCSVField * buffer = new RawCSVField[single_buffer_capacity]; - buffers.push_back(buffer); + CSV_INLINE RawCSVField& CSVFieldList::operator[](size_t n) const { + const size_t page_no = n / _single_buffer_capacity; + const size_t buffer_idx = (page_no < 1) ? n : n % _single_buffer_capacity; + return this->buffers[page_no][buffer_idx]; + } + + CSV_INLINE void CSVFieldList::allocate() { + buffers.push_back(std::unique_ptr(new RawCSVField[_single_buffer_capacity])); + _current_buffer_size = 0; + _back = buffers.back().get(); } } @@ -7064,7 +7754,6 @@ namespace csv { } CSV_INLINE CSVRow::operator std::vector() const { - std::vector ret; for (size_t i = 0; i < size(); i++) ret.push_back(std::string(this->get_field(i))); @@ -7074,23 +7763,21 @@ namespace csv { CSV_INLINE csv::string_view CSVRow::get_field(size_t index) const { - if (index >= this->size()) { - throw std::runtime_error("Index out of bounds."); - } + using internals::ParseFlags; - size_t field_index = this->field_bounds_index + index; - const RawCSVField& raw_field = this->data->fields[field_index]; - bool has_doubled_quote = this->data->has_double_quotes.find(field_index) != this->data->has_double_quotes.end(); + if (index >= this->size()) + throw std::runtime_error("Index out of bounds."); - csv::string_view csv_field = csv::string_view(this->data->data).substr(this->data_start + raw_field.start); + const size_t field_index = this->fields_start + index; + auto& field = this->data->fields[field_index]; + auto field_str = csv::string_view(this->data->data).substr(this->data_start + field.start); - if (has_doubled_quote) { - std::string& ret = this->data->double_quote_fields[field_index]; - if (ret.empty()) { + if (field.has_double_quote) { + auto& value = this->data->double_quote_fields[field_index]; + if (value.empty()) { bool prev_ch_quote = false; - for (size_t i = 0; i < raw_field.length; i++) { - // TODO: Use parse flags - if (csv_field[i] == '"') { + for (size_t i = 0; i < field.length; i++) { + if (this->data->parse_flags[field_str[i] + 128] == ParseFlags::QUOTE) { if (prev_ch_quote) { prev_ch_quote = false; continue; @@ -7100,14 +7787,101 @@ namespace csv { } } - ret += csv_field[i]; + value += field_str[i]; } } - return csv::string_view(ret); + return csv::string_view(value); + } + + return field_str.substr(0, field.length); + } + + CSV_INLINE bool CSVField::try_parse_hex(int& parsedValue) { + size_t start = 0, end = 0; + + // Trim out whitespace chars + for (; start < this->sv.size() && this->sv[start] == ' '; start++); + for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++); + + int value_ = 0; + + size_t digits = (end - start); + size_t base16_exponent = digits - 1; + + if (digits == 0) return false; + + for (const auto& ch : this->sv.substr(start, digits)) { + int digit = 0; + + switch (ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + digit = static_cast(ch - '0'); + break; + case 'a': + case 'A': + digit = 10; + break; + case 'b': + case 'B': + digit = 11; + break; + case 'c': + case 'C': + digit = 12; + break; + case 'd': + case 'D': + digit = 13; + break; + case 'e': + case 'E': + digit = 14; + break; + case 'f': + case 'F': + digit = 15; + break; + default: + return false; + } + + value_ += digit * (int)pow(16, (double)base16_exponent); + base16_exponent--; + } + + parsedValue = value_; + return true; + } + + CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) { + // If field has already been parsed to empty, no need to do it aagin: + if (this->_type == DataType::CSV_NULL) + return false; + + // Not yet parsed or possibly parsed with other decimalSymbol + if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE) + this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again + + // Integral types are not affected by decimalSymbol and need not be parsed again + + // Either we already had an integral type before, or we we just got any numeric type now. + if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) { + dVal = this->value; + return true; } - return csv_field.substr(0, raw_field.length); + // CSV_NULL or CSV_STRING, not numeric + return false; } #ifdef _MSC_VER @@ -7123,11 +7897,11 @@ namespace csv { * @warning Attempting to dereference the end iterator results * in dereferencing a null pointer. */ - CSV_INLINE CSVRow::iterator CSVRow::end() const { + CSV_INLINE CSVRow::iterator CSVRow::end() const noexcept { return CSVRow::iterator(this, (int)this->size()); } - CSV_INLINE CSVRow::reverse_iterator CSVRow::rbegin() const { + CSV_INLINE CSVRow::reverse_iterator CSVRow::rbegin() const noexcept { return std::reverse_iterator(this->end()); } @@ -7150,12 +7924,7 @@ namespace csv { } CSV_INLINE CSVRow::iterator::pointer CSVRow::iterator::operator->() const { - // Using CSVField * as pointer type causes segfaults in MSVC debug builds - #ifdef _MSC_BUILD return this->field; - #else - return this->field.get(); - #endif } CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator++() { @@ -7190,7 +7959,7 @@ namespace csv { this->operator--(); return temp; } - + CSV_INLINE CSVRow::iterator CSVRow::iterator::operator+(difference_type n) const { // Allows for iterator arithmetic return CSVRow::iterator(this->daddy, i + (int)n); @@ -7219,9 +7988,9 @@ namespace csv { The code is licensed under the [MIT License](http://opensource.org/licenses/MIT): - + Copyright © 2013-2015 Niels Lohmann. - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, @@ -7229,10 +7998,10 @@ namespace csv { publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -7300,7 +8069,8 @@ namespace csv { } // create a result string of necessary size - std::string result(s.size() + space, '\\'); + size_t result_size = s.size() + space; + std::string result(result_size, '\\'); std::size_t pos = 0; for (const auto& c : s) @@ -7375,7 +8145,7 @@ namespace csv { if (c >= 0x00 && c <= 0x1f) { // print character c as \uxxxx - sprintf(&result[pos + 1], "u%04x", int(c)); + snprintf(&result[pos + 1], result_size - pos - 1, "u%04x", int(c)); pos += 6; // overwrite trailing null character result[pos] = '\\'; @@ -7409,7 +8179,7 @@ namespace csv { const size_t _n_cols = col_names.size(); std::string ret = "{"; - + for (size_t i = 0; i < _n_cols; i++) { auto& col = col_names[i]; auto field = this->operator[](col); @@ -7465,6 +8235,7 @@ namespace csv { return ret; } } + /** @file * Calculates statistics from CSV files */ @@ -7472,30 +8243,25 @@ namespace csv { #include namespace csv { + /** Calculate statistics for an arbitrarily large file. When this constructor + * is called, CSVStat will process the entire file iteratively. Once finished, + * methods like get_mean(), get_counts(), etc... can be used to retrieve statistics. + */ CSV_INLINE CSVStat::CSVStat(csv::string_view filename, CSVFormat format) : - CSVReader(filename, format) { - /** Lazily calculate statistics for a potentially large file. Once this constructor - * is called, CSVStat will process the entire file iteratively. Once finished, - * methods like get_mean(), get_counts(), etc... can be used to retrieve statistics. - */ - while (!this->eof()) { - this->read_csv(internals::ITERATION_CHUNK_SIZE); - this->calc(); - } - - if (!this->records.empty()) - this->calc(); + reader(filename, format) { + this->calc(); } - CSV_INLINE void CSVStat::end_feed() { - CSVReader::end_feed(); + /** Calculate statistics for a CSV stored in a std::stringstream */ + CSV_INLINE CSVStat::CSVStat(std::stringstream& stream, CSVFormat format) : + reader(stream, format) { this->calc(); } /** Return current means */ CSV_INLINE std::vector CSVStat::get_mean() const { - std::vector ret; - for (size_t i = 0; i < this->col_names->size(); i++) { + std::vector ret; + for (size_t i = 0; i < this->get_col_names().size(); i++) { ret.push_back(this->rolling_means[i]); } return ret; @@ -7503,8 +8269,8 @@ namespace csv { /** Return current variances */ CSV_INLINE std::vector CSVStat::get_variance() const { - std::vector ret; - for (size_t i = 0; i < this->col_names->size(); i++) { + std::vector ret; + for (size_t i = 0; i < this->get_col_names().size(); i++) { ret.push_back(this->rolling_vars[i]/(this->n[i] - 1)); } return ret; @@ -7512,8 +8278,8 @@ namespace csv { /** Return current mins */ CSV_INLINE std::vector CSVStat::get_mins() const { - std::vector ret; - for (size_t i = 0; i < this->col_names->size(); i++) { + std::vector ret; + for (size_t i = 0; i < this->get_col_names().size(); i++) { ret.push_back(this->mins[i]); } return ret; @@ -7521,8 +8287,8 @@ namespace csv { /** Return current maxes */ CSV_INLINE std::vector CSVStat::get_maxes() const { - std::vector ret; - for (size_t i = 0; i < this->col_names->size(); i++) { + std::vector ret; + for (size_t i = 0; i < this->get_col_names().size(); i++) { ret.push_back(this->maxes[i]); } return ret; @@ -7531,7 +8297,7 @@ namespace csv { /** Get counts for each column */ CSV_INLINE std::vector CSVStat::get_counts() const { std::vector ret; - for (size_t i = 0; i < this->col_names->size(); i++) { + for (size_t i = 0; i < this->get_col_names().size(); i++) { ret.push_back(this->counts[i]); } return ret; @@ -7539,48 +8305,67 @@ namespace csv { /** Get data type counts for each column */ CSV_INLINE std::vector CSVStat::get_dtypes() const { - std::vector ret; - for (size_t i = 0; i < this->col_names->size(); i++) { + std::vector ret; + for (size_t i = 0; i < this->get_col_names().size(); i++) { ret.push_back(this->dtypes[i]); } return ret; } - CSV_INLINE void CSVStat::calc() { - /** Go through all records and calculate specified statistics */ - for (size_t i = 0; i < this->col_names->size(); i++) { - dtypes.push_back({}); - counts.push_back({}); - rolling_means.push_back(0); - rolling_vars.push_back(0); - mins.push_back(NAN); - maxes.push_back(NAN); - n.push_back(0); + CSV_INLINE void CSVStat::calc_chunk() { + /** Only create stats counters the first time **/ + if (dtypes.empty()) { + /** Go through all records and calculate specified statistics */ + for (size_t i = 0; i < this->get_col_names().size(); i++) { + dtypes.push_back({}); + counts.push_back({}); + rolling_means.push_back(0); + rolling_vars.push_back(0); + mins.push_back(NAN); + maxes.push_back(NAN); + n.push_back(0); + } } - std::vector pool; - // Start threads - for (size_t i = 0; i < this->col_names->size(); i++) + std::vector pool; + for (size_t i = 0; i < this->get_col_names().size(); i++) pool.push_back(std::thread(&CSVStat::calc_worker, this, i)); // Block until done - for (auto& th: pool) + for (auto& th : pool) th.join(); this->records.clear(); } + CSV_INLINE void CSVStat::calc() { + constexpr size_t CALC_CHUNK_SIZE = 5000; + + for (auto& row : reader) { + this->records.push_back(std::move(row)); + + /** Chunk rows */ + if (this->records.size() == CALC_CHUNK_SIZE) { + calc_chunk(); + } + } + + if (!this->records.empty()) { + calc_chunk(); + } + } + CSV_INLINE void CSVStat::calc_worker(const size_t &i) { /** Worker thread for CSVStat::calc() which calculates statistics for one column. - * + * * @param[in] i Column index */ auto current_record = this->records.begin(); for (size_t processed = 0; current_record != this->records.end(); processed++) { - if (current_record->size() == this->n_cols) { + if (current_record->size() == this->get_col_names().size()) { auto current_field = (*current_record)[i]; // Optimization: Don't count() if there's too many distinct values in the first 1000 rows @@ -7598,7 +8383,7 @@ namespace csv { this->min_max(x_n, i); } } - else if (this->_format.get_variable_column_policy() == VariableColumnPolicy::THROW) { + else if (this->reader.get_format().get_variable_column_policy() == VariableColumnPolicy::THROW) { throw std::runtime_error("Line has different length than the others " + internals::format_row(*current_record)); } @@ -7611,7 +8396,7 @@ namespace csv { * @param[in] record Data observation * @param[out] i The column index that should be updated */ - + auto type = data.type(); if (this->dtypes[i].find(type) != this->dtypes[i].end()) { @@ -7650,7 +8435,7 @@ namespace csv { this->mins[i] = x_n; if (std::isnan(this->maxes[i])) this->maxes[i] = x_n; - + if (x_n < this->mins[i]) this->mins[i] = x_n; else if (x_n > this->maxes[i]) @@ -7670,7 +8455,7 @@ namespace csv { long double delta2; current_n++; - + if (current_n == 1) { current_rolling_mean = x_n; } else { @@ -7717,20 +8502,32 @@ namespace csv { return csv_dtypes; } } +#include #include namespace csv { - /** Shorthand function for parsing an in-memory CSV string, - * a collection of CSVRow objects + /** Shorthand function for parsing an in-memory CSV string + * + * @return A collection of CSVRow objects * + * @par Example * @snippet tests/test_read_csv.cpp Parse Example */ CSV_INLINE CSVReader parse(csv::string_view in, CSVFormat format) { - CSVReader parser(format); - parser.feed(in); - parser.end_feed(); - return parser; + std::stringstream stream(in.data()); + return CSVReader(stream, format); + } + + /** Parses a CSV string with no headers + * + * @return A collection of CSVRow objects + */ + CSV_INLINE CSVReader parse_no_header(csv::string_view in) { + CSVFormat format; + format.header_row(-1); + + return parse(in, format); } /** Parse a RFC 4180 CSV string, returning a collection @@ -7744,6 +8541,11 @@ namespace csv { return parse(csv::string_view(in, n)); } + /** A shorthand for csv::parse_no_header() */ + CSV_INLINE CSVReader operator ""_csv_no_header(const char* in, size_t n) { + return parse_no_header(csv::string_view(in, n)); + } + /** * Find the position of a column in a CSV file or CSV_NOT_FOUND otherwise * @@ -7752,9 +8554,9 @@ namespace csv { * @param[in] format Format of the CSV file */ CSV_INLINE int get_col_pos( - const std::string filename, - const std::string col_name, - const CSVFormat format) { + csv::string_view filename, + csv::string_view col_name, + const CSVFormat& format) { CSVReader reader(filename, format); return reader.index_of(col_name); } @@ -7771,172 +8573,13 @@ namespace csv { filename, reader.get_col_names(), format.get_delim(), - reader.size(), - (int)reader.get_col_names().size() + reader.n_rows(), + reader.get_col_names().size() }; return info; } } -namespace csv { - CSV_INLINE void BasicCSVParser::parse(csv::string_view in, std::deque& records) { - using internals::ParseFlags; - - this->set_data_ptr(std::make_shared()); - this->data_ptr->col_names = this->col_names; - this->_records = &records; - - // Check for previous fragments - if ((this->current_row.data && this->current_row.size() > 0) || this->field_length > 0) { - // Make a separate data buffer for the fragment row - auto temp_str = this->current_row.data->data.substr(this->current_row.data_start); - - this->current_row.data = this->data_ptr; - this->current_row.data_start = 0; - this->current_row.row_length = 0; - this->current_row.field_bounds_index = 0; - - this->field_start = -1; - this->field_length = 0; - - auto& fragment_data = this->current_row.data; - fragment_data->data.reserve(temp_str.size() + in.size()); - fragment_data->data = temp_str; - fragment_data->data += in; - - in = csv::string_view(fragment_data->data); - } - else { - this->data_ptr->data.assign(in.data(), in.size()); - this->current_row = CSVRow(this->data_ptr); - } - - this->parse_loop(in); - } - - CSV_INLINE void BasicCSVParser::push_field() - { - // Push field - this->fields->push_back({ - this->field_start > 0 ? (unsigned int)this->field_start : 0, - this->field_length - }); - this->current_row.row_length++; - - if (this->field_has_double_quote) { - this->current_row.data->has_double_quotes.insert(this->data_ptr->fields.size() - 1); - this->field_has_double_quote = false; - } - - // Reset field state - this->field_start = -1; - this->field_length = 0; - } - - CONSTEXPR void BasicCSVParser::parse_field(csv::string_view in, size_t& i, const size_t& current_row_start, bool quote_escape) { - using internals::ParseFlags; - - // Trim off leading whitespace - while (i < in.size() && ws_flag(in[i])) i++; - - if (this->field_start < 0) { - this->field_start = (int)(i - current_row_start); - } - - // Optimization: Since NOT_SPECIAL characters tend to occur in contiguous - // sequences, use the loop below to avoid having to go through the outer - // switch statement as much as possible - if (quote_escape) { - while (i < in.size() && parse_flag(in[i]) != ParseFlags::QUOTE) i++; - } - else { - while (i < in.size() && parse_flag(in[i]) == ParseFlags::NOT_SPECIAL) i++; - } - - this->field_length = i - (this->field_start + current_row_start); - - // Trim off trailing whitespace, this->field_length constraint matters - // when field is entirely whitespace - for (size_t j = i - 1; ws_flag(in[j]) && this->field_length > 0; j--) this->field_length--; - } - - CSV_INLINE void BasicCSVParser::parse_loop(csv::string_view in) - { - using internals::ParseFlags; - - // Parser state - size_t current_row_start = 0; - bool quote_escape = false; - - size_t in_size = in.size(); - for (size_t i = 0; i < in_size; ) { - if (quote_escape) { - // TODO: Clean up these conditions - if (parse_flag(in[i]) == ParseFlags::QUOTE) { - if (i + 1 == in.size() || (i + 1 < in.size() && parse_flag(in[i + 1]) >= ParseFlags::DELIMITER)) { - quote_escape = false; - i++; - continue; - } - - // Case: Escaped quote - this->field_length++; - i++; - - if (i < in.size() && parse_flag(in[i]) == ParseFlags::QUOTE) { - i++; - this->field_length++; - this->field_has_double_quote = true; - } - - continue; - } - - this->parse_field(in, i, current_row_start, quote_escape); - } - else { - switch (parse_flag(in[i])) { - case ParseFlags::DELIMITER: - this->push_field(); - i++; - break; - - case ParseFlags::NEWLINE: - i++; - - // Catches CRLF (or LFLF) - if (i < in.size() && parse_flag(in[i]) == ParseFlags::NEWLINE) i++; - - // End of record -> Write record - this->push_field(); - this->push_row(*this->_records); - this->current_row = CSVRow(this->data_ptr); - this->current_row.data_start = i; - this->current_row.field_bounds_index = this->data_ptr->fields.size(); - current_row_start = i; - break; - - case ParseFlags::NOT_SPECIAL: - this->parse_field(in, i, current_row_start, quote_escape); - break; - default: // Quote - if (this->field_length == 0) { - quote_escape = true; - i++; - break; - } - - // Unescaped quote - this->field_length++; - i++; - - break; - } - } - } - } -} - #endif From a2209d4ef5b87b56bb3c19f87ee36a17827eacb9 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 16:57:29 +0800 Subject: [PATCH 030/100] Moved csv.hpp to include folder --- {src/cartogram_info => include}/csv.hpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {src/cartogram_info => include}/csv.hpp (100%) diff --git a/src/cartogram_info/csv.hpp b/include/csv.hpp similarity index 100% rename from src/cartogram_info/csv.hpp rename to include/csv.hpp From 551873d73a28f4c6d2078447f6ba2e2017107880 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 16:59:34 +0800 Subject: [PATCH 031/100] Remove MSVC conditions, we don't support MSVC --- CMakeLists.txt | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f6712939..c33a2985 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,9 @@ cmake_minimum_required(VERSION 3.6) -if(UNIX AND NOT APPLE) +# We only support macOS (Darwin) and Linux +set(SUPPORTED_SYSTEMS "Linux" "Darwin") + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(CMAKE_CXX_COMPILER "g++-11") set(CMAKE_C_COMPILER "gcc-11") endif() @@ -44,14 +47,12 @@ target_include_directories(cartogram PUBLIC ) # ========== Compile Options ========== +target_compile_options(cartogram PRIVATE -ffp-contract=off) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(cartogram PRIVATE -isystem ${Boost_INCLUDE_DIRS}) - target_compile_options(cartogram PRIVATE -ffp-contract=off) -elseif(UNIX AND NOT APPLE) +else() + # Likely compiling on ubuntu with G++, like in the CI target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) - target_compile_options(cartogram PRIVATE -ffp-contract=off) -elseif(MSVC) - target_compile_options(cartogram PRIVATE /external:I ${Boost_INCLUDE_DIRS}) endif() # Compiler warnings @@ -95,14 +96,11 @@ foreach(TEST_FILE ${TEST_FILES}) PkgConfig::fftw ) + target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(${TEST_NAME} PRIVATE -isystem ${Boost_INCLUDE_DIRS}) - target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) - elseif(UNIX AND NOT APPLE) + else() target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) - target_compile_options(cartogram PRIVATE -ffp-contract=off) - elseif(MSVC) - target_compile_options(${TEST_NAME} PRIVATE /external:I ${Boost_INCLUDE_DIRS}) endif() # Compiler warnings for the test executable From 7a1bfbc1241df09a9e50824abe9f7c977655c754 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:00:23 +0800 Subject: [PATCH 032/100] Remove Matplot++ dependency --- CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c33a2985..a7bde54d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,9 +26,6 @@ endif() # Boost find_package(Boost REQUIRED COMPONENTS unit_test_framework) -# Matplot++ -find_package(Matplot++) - # PkgConfig, fftw, and cairo find_package(PkgConfig REQUIRED) pkg_search_module(fftw REQUIRED fftw3 IMPORTED_TARGET) @@ -44,6 +41,7 @@ target_include_directories(cartogram PUBLIC ${Boost_INCLUDE_DIRS} PkgConfig::fftw PkgConfig::cairo + $<$:/opt/homebrew/include> ) # ========== Compile Options ========== @@ -62,7 +60,6 @@ target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated target_link_libraries(cartogram PkgConfig::fftw PkgConfig::cairo - $<$:Matplot++::matplot> ) # ========== Installation ========== @@ -94,6 +91,7 @@ foreach(TEST_FILE ${TEST_FILES}) ${PROJECT_SOURCE_DIR}/include ${Boost_INCLUDE_DIRS} PkgConfig::fftw + $<$:/opt/homebrew/include> ) target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) From d925bbcb0eaf15b4ffd4955123e4c77aac4ed477 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:01:28 +0800 Subject: [PATCH 033/100] Add SYSTEM property to external libraries --- CMakeLists.txt | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7bde54d..5f572e4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,11 +36,13 @@ file(GLOB_RECURSE CARTOGRAM_SOURCES "src/*.cpp") add_executable(cartogram ${CARTOGRAM_SOURCES}) # ========== Include Directories ========== -target_include_directories(cartogram PUBLIC - ${PROJECT_SOURCE_DIR}/include - ${Boost_INCLUDE_DIRS} - PkgConfig::fftw - PkgConfig::cairo +target_include_directories(cartogram + PRIVATE + ${PROJECT_SOURCE_DIR}/include + SYSTEM + ${Boost_INCLUDE_DIRS} + PkgConfig::fftw + PkgConfig::cairo $<$:/opt/homebrew/include> ) @@ -54,12 +56,16 @@ else() endif() # Compiler warnings -target_compile_options(cartogram PRIVATE -Wall -Wextra -pedantic -Wno-deprecated-declarations) +target_compile_options(cartogram PRIVATE + -Wall # Enable all warnings + -Wextra # Enable extra warnings + -Wpedantic # Enable pedantic warnings +) # ========== Linking Libraries ========== target_link_libraries(cartogram - PkgConfig::fftw - PkgConfig::cairo + PkgConfig::fftw + PkgConfig::cairo ) # ========== Installation ========== @@ -87,10 +93,13 @@ foreach(TEST_FILE ${TEST_FILES}) add_executable(${TEST_NAME} ${TEST_FILE} ${CARTOGRAM_TEST_SOURCES_FROM_SRC}) # Include directories for the test executable - target_include_directories(${TEST_NAME} PUBLIC - ${PROJECT_SOURCE_DIR}/include - ${Boost_INCLUDE_DIRS} - PkgConfig::fftw + target_include_directories(${TEST_NAME} + PUBLIC + ${PROJECT_SOURCE_DIR}/include + SYSTEM + ${Boost_INCLUDE_DIRS} + PkgConfig::fftw + PkgConfig::cairo $<$:/opt/homebrew/include> ) From 5688ac06dbff60d4bf521ceddb2164fe9719376f Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:19:45 +0800 Subject: [PATCH 034/100] Update installation instructions, and fix CMake bug --- CMakeLists.txt | 5 +---- README.md | 12 +++++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f572e4a..99db8b01 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,6 @@ cmake_minimum_required(VERSION 3.6) -# We only support macOS (Darwin) and Linux -set(SUPPORTED_SYSTEMS "Linux" "Darwin") - -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") +if(UNIX AND NOT APPLE) set(CMAKE_CXX_COMPILER "g++-11") set(CMAKE_C_COMPILER "gcc-11") endif() diff --git a/README.md b/README.md index cf31a6b3..e533484b 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ Data produced by code in this repository are subject to the MIT license found [h ## Dependencies +Please note, we only support UNIX-based systems, and have only tested on macOS, Linux, and GNU. + ### macOS #### Installing Homebrew @@ -24,23 +26,23 @@ Install [homebrew](brew.sh) by running the following command: Install llvm, pkg-config, boost, fftw, cgal, nlohmann-json, and cmake by running the following command: - brew install llvm@17 libomp pkg-config boost fftw cgal nlohmann-json cmake cairo matplotplusplus + brew install libomp pkg-config boost fftw cgal nlohmann-json cmake cairo ### Debian-based distributions (Ubuntu, Arch Linux etc.) -#### Installing GNU g++-13 +#### Installing GNU g++-11 Run the following commands to install it: sudo apt install build-essential manpages-dev software-properties-common sudo add-apt-repository ppa:ubuntu-toolchain-r/test - sudo apt update && sudo apt install gcc-13 g++-13 + sudo apt update && sudo apt install gcc-11 g++-11 #### Installing dependencies through apt -Install nlohmann-json, cgal, openmp, fftw3, cairo, matplot++, boost, and cmake by running the following command: +Install other dependencies available from apt by running the following command: - sudo apt install nlohmann-json3-dev libcgal-dev libomp-dev libfftw3-dev libcairo2-dev libmatplot++-dev libboost-all-dev cmake + cat apt-requirements.txt | xargs sudo apt install ### Installation From 234d9bae857b4b095f3059e2357ca64e00b82889 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:36:38 +0800 Subject: [PATCH 035/100] Update `target_include_directories` based on SO post https://stackoverflow.com/questions/79073519/cmake-system-flag-not-suppressing-warnings-from-boost-header-only-libraries/79073678#79073678 --- CMakeLists.txt | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 99db8b01..9d33f67c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,9 +34,12 @@ add_executable(cartogram ${CARTOGRAM_SOURCES}) # ========== Include Directories ========== target_include_directories(cartogram - PRIVATE + PUBLIC ${PROJECT_SOURCE_DIR}/include - SYSTEM +) + +target_include_directories(cartogram + SYSTEM PUBLIC ${Boost_INCLUDE_DIRS} PkgConfig::fftw PkgConfig::cairo @@ -45,12 +48,6 @@ target_include_directories(cartogram # ========== Compile Options ========== target_compile_options(cartogram PRIVATE -ffp-contract=off) -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(cartogram PRIVATE -isystem ${Boost_INCLUDE_DIRS}) -else() - # Likely compiling on ubuntu with G++, like in the CI - target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) -endif() # Compiler warnings target_compile_options(cartogram PRIVATE @@ -93,7 +90,9 @@ foreach(TEST_FILE ${TEST_FILES}) target_include_directories(${TEST_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include - SYSTEM + ) + target_include_directories(${TEST_NAME} + SYSTEM PUBLIC ${Boost_INCLUDE_DIRS} PkgConfig::fftw PkgConfig::cairo @@ -101,11 +100,6 @@ foreach(TEST_FILE ${TEST_FILES}) ) target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(${TEST_NAME} PRIVATE -isystem ${Boost_INCLUDE_DIRS}) - else() - target_compile_options(cartogram PRIVATE -I ${Boost_INCLUDE_DIRS}) - endif() # Compiler warnings for the test executable target_compile_options(${TEST_NAME} PRIVATE -Wall -Wextra -pedantic -Wno-deprecated-declarations) From 0e80c4023caf78fbdfdd3fcf2440d59f90ca72eb Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:41:25 +0800 Subject: [PATCH 036/100] More updates based on SO post. Thank you! https://stackoverflow.com/questions/79073519/cmake-system-flag-not-suppressing-warnings-from-boost-header-only-libraries/79073678#79073678 --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d33f67c..9bff72b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,8 +41,6 @@ target_include_directories(cartogram target_include_directories(cartogram SYSTEM PUBLIC ${Boost_INCLUDE_DIRS} - PkgConfig::fftw - PkgConfig::cairo $<$:/opt/homebrew/include> ) @@ -94,8 +92,6 @@ foreach(TEST_FILE ${TEST_FILES}) target_include_directories(${TEST_NAME} SYSTEM PUBLIC ${Boost_INCLUDE_DIRS} - PkgConfig::fftw - PkgConfig::cairo $<$:/opt/homebrew/include> ) From c9dff97a7104fbf1446d57eb39147905d7b4dc7e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:45:46 +0800 Subject: [PATCH 037/100] Run build.yml only on main --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e9fa75b9..ef7471f0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ on: # when a push is made to the main branch (like when a pull request is merged, or something is pushed directly) workflow_dispatch: push: - branches: [ "main", "pr/adisidev/201" ] + branches: [ "main" ] env: BUILD_TYPE: Release From 2eefaecac34921a4275b405402cd5c175c2477b0 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:38:00 +0800 Subject: [PATCH 038/100] Add CGAL as a submodule --- .gitmodules | 3 +++ external/cgal | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 external/cgal diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..00639f9e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/cgal"] + path = external/cgal + url = https://github.com/CGAL/cgal.git diff --git a/external/cgal b/external/cgal new file mode 160000 index 00000000..3deebceb --- /dev/null +++ b/external/cgal @@ -0,0 +1 @@ +Subproject commit 3deebceb97bc7c1026622113315b0a311a1e4bb5 From a4b08b1b1cb013f4e82f6995d2ef39dc56f374a1 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:38:34 +0800 Subject: [PATCH 039/100] Switch to previous major CGAL release --- external/cgal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/cgal b/external/cgal index 3deebceb..188e51ba 160000 --- a/external/cgal +++ b/external/cgal @@ -1 +1 @@ -Subproject commit 3deebceb97bc7c1026622113315b0a311a1e4bb5 +Subproject commit 188e51bad36ffc30e49dbabda29620b71a84664c From 034b04ca721d7f93ef3f49c5aac9c6f5ab2acc0e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:39:34 +0800 Subject: [PATCH 040/100] Move csv.hpp to external --- {include => external}/csv.hpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {include => external}/csv.hpp (100%) diff --git a/include/csv.hpp b/external/csv.hpp similarity index 100% rename from include/csv.hpp rename to external/csv.hpp From ab8c45c65f6be38d7efb1d85c8853d1f78676e5a Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:01:35 +0800 Subject: [PATCH 041/100] cgal --> CGAL --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 00639f9e..988a5c24 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "external/cgal"] - path = external/cgal + path = external/CGAL url = https://github.com/CGAL/cgal.git From 61f79a3315cb930cb969866e9d7bb6a717db7738 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:11:27 +0800 Subject: [PATCH 042/100] Revert "cgal --> CGAL" This reverts commit ab8c45c65f6be38d7efb1d85c8853d1f78676e5a. --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 988a5c24..00639f9e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "external/cgal"] - path = external/CGAL + path = external/cgal url = https://github.com/CGAL/cgal.git From 36c9bd4b8464f7caa92fb38605b13ae48ed20ffb Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:20:30 +0800 Subject: [PATCH 043/100] Direct CMake to local CGAL, not system CGAL --- CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bff72b5..67f7451f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,10 @@ endif() # ========== Dependencies Setup ========== +# Direct CMake to local CGAL installation +set(CGAL_DIR ${PROJECT_SOURCE_DIR}/external/cgal) +find_package(CGAL REQUIRED) + # Boost find_package(Boost REQUIRED COMPONENTS unit_test_framework) @@ -40,8 +44,9 @@ target_include_directories(cartogram target_include_directories(cartogram SYSTEM PUBLIC + ${CGAL_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} - $<$:/opt/homebrew/include> + ${PROJECT_SOURCE_DIR}/external ) # ========== Compile Options ========== @@ -91,8 +96,9 @@ foreach(TEST_FILE ${TEST_FILES}) ) target_include_directories(${TEST_NAME} SYSTEM PUBLIC + ${CGAL_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} - $<$:/opt/homebrew/include> + ${PROJECT_SOURCE_DIR}/external ) target_compile_options(${TEST_NAME} PRIVATE -ffp-contract=off) From 6feb7682c8ed5ffa47cf0fc13d71df04329d7585 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:29:15 +0800 Subject: [PATCH 044/100] Update instructions to not install CGAL --- README.md | 18 ++++++------------ apt-requirements.txt | 3 ++- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e533484b..405698b5 100644 --- a/README.md +++ b/README.md @@ -24,26 +24,20 @@ Install [homebrew](brew.sh) by running the following command: #### Installing dependencies through Homebrew -Install llvm, pkg-config, boost, fftw, cgal, nlohmann-json, and cmake by running the following command: +Install pkg-config, boost, fftw, nlohmann-json, and cmake by running the following command: - brew install libomp pkg-config boost fftw cgal nlohmann-json cmake cairo + brew install libomp pkg-config boost fftw nlohmann-json cmake cairo ### Debian-based distributions (Ubuntu, Arch Linux etc.) -#### Installing GNU g++-11 +#### Installing relevant dependencies through apt: -Run the following commands to install it: - - sudo apt install build-essential manpages-dev software-properties-common - sudo add-apt-repository ppa:ubuntu-toolchain-r/test - sudo apt update && sudo apt install gcc-11 g++-11 - -#### Installing dependencies through apt - -Install other dependencies available from apt by running the following command: +Have a look through to apt-requirements.txt if you'd like to see what all will be installed. Then, run the following commands to install all dependencies through apt: + sudo apt update && add-apt-repository ppa:ubuntu-toolchain-r/test cat apt-requirements.txt | xargs sudo apt install + ### Installation Go to the `cartogram-cpp` directory in your preferred terminal and execute the following commands. diff --git a/apt-requirements.txt b/apt-requirements.txt index ee202b4f..5290d540 100644 --- a/apt-requirements.txt +++ b/apt-requirements.txt @@ -1,8 +1,9 @@ +gcc-11 +g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev -libcgal-dev libomp-dev libfftw3-dev libcairo2-dev From 4002a099d082c0908f24daef7d74c9df7c040032 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:30:22 +0800 Subject: [PATCH 045/100] Update build.yml to mirror README.md --- .github/workflows/build.yml | 6 ++---- README.md | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ef7471f0..8775c83f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,10 +30,8 @@ jobs: - name: Install Dependencies run: | - sudo apt update - sudo apt install -y build-essential manpages-dev software-properties-common nlohmann-json3-dev libcgal-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - sudo apt update && sudo apt install -y gcc-11 g++-11 + sudo apt update && add-apt-repository ppa:ubuntu-toolchain-r/test + cat apt-requirements.txt | xargs sudo apt install -y # - name: Save APT cache # uses: actions/cache@v3 diff --git a/README.md b/README.md index 405698b5..2abe9078 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Install pkg-config, boost, fftw, nlohmann-json, and cmake by running the followi Have a look through to apt-requirements.txt if you'd like to see what all will be installed. Then, run the following commands to install all dependencies through apt: sudo apt update && add-apt-repository ppa:ubuntu-toolchain-r/test - cat apt-requirements.txt | xargs sudo apt install + cat apt-requirements.txt | xargs sudo apt install -y ### Installation From 51cdb4a7481c4b0c2cf8bfd065ac451e9d6f35eb Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:33:42 +0800 Subject: [PATCH 046/100] Add instruction to use `--recurse-submodules` --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2abe9078..9fedd19b 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,10 @@ Gastner MT, Seguy V, More P. _Fast flow-based algorithm for creating density-equ Data produced by code in this repository are subject to the MIT license found [here](./LICENSE) and should cite the aforementioned paper by Gastner et al. (2018). +While cloning this repository, please ensure you use the `--recurse-submodules` flag like so: + + git clone --recurse-submodules https://github.com/mgastner/cartogram-cpp.git + ## Dependencies Please note, we only support UNIX-based systems, and have only tested on macOS, Linux, and GNU. From c72923660ebcdbbfdf179c82eadd162a9e10f080 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:45:10 +0800 Subject: [PATCH 047/100] Add branch as trigger for testing --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8775c83f..04ce0114 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ on: # when a push is made to the main branch (like when a pull request is merged, or something is pushed directly) workflow_dispatch: push: - branches: [ "main" ] + branches: [ "main", "pr/adisidev/201" ] env: BUILD_TYPE: Release From 132624aee495415bc8b1768b553c4fd7b2bb8049 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:46:55 +0800 Subject: [PATCH 048/100] Split adpt update and add-apt-repo commands --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 04ce0114..b73cb4a9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,8 @@ jobs: - name: Install Dependencies run: | - sudo apt update && add-apt-repository ppa:ubuntu-toolchain-r/test + sudo apt update + sudo add-apt-repository ppa:ubuntu-toolchain-r/test cat apt-requirements.txt | xargs sudo apt install -y # - name: Save APT cache From 58d0619eca0bacdfa36b1f09f53b6c3f0e14ff4d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:55:15 +0800 Subject: [PATCH 049/100] Cache dependencies --- .github/workflows/build.yml | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b73cb4a9..a91cc092 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,30 +15,22 @@ jobs: runs-on: ubuntu-latest steps: + - name: Install dependencies + uses: awalsh128/cache-apt-pkgs-action@latest + with: + packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake + version: 1.0 + - name: Checkout Repository uses: actions/checkout@v4 with: fetch-depth: 0 - # - name: Cache APT packages - # uses: actions/cache@v3 - # with: - # path: /var/cache/apt - # key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} - # restore-keys: | - # ${{ runner.os }}-apt- - - - name: Install Dependencies - run: | - sudo apt update - sudo add-apt-repository ppa:ubuntu-toolchain-r/test - cat apt-requirements.txt | xargs sudo apt install -y - - # - name: Save APT cache - # uses: actions/cache@v3 - # with: - # path: /var/cache/apt - # key: ${{ runner.os }}-apt-${{ hashFiles('apt-dependencies.txt') }} + # - name: Install Dependencies + # run: | + # sudo apt update + # sudo add-apt-repository ppa:ubuntu-toolchain-r/test + # cat apt-requirements.txt | xargs sudo apt install -y - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} From 8742ee07c35361b0fe5ef9dd35ddc63e85e6cd3a Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:00:14 +0800 Subject: [PATCH 050/100] Don't output new_tag to GITHUB_OUTPUT --- .github/workflows/build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a91cc092..0ee98267 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -79,8 +79,6 @@ jobs: # Push the tag to the repository git push origin "${short_sha}" - - echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 8480a9a211b2e12e5680e33db3dae9e53507a4fa Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:02:21 +0800 Subject: [PATCH 051/100] Change CGAL_DIR --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67f7451f..93a40804 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ endif() # ========== Dependencies Setup ========== # Direct CMake to local CGAL installation -set(CGAL_DIR ${PROJECT_SOURCE_DIR}/external/cgal) +set(CGAL_DIR ${PROJECT_SOURCE_DIR}/external/cgal/Installation/lib/cmake/CGAL) find_package(CGAL REQUIRED) # Boost From 23f1b61e5127a292805867c9fbaa7747fd5e40e2 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:07:01 +0800 Subject: [PATCH 052/100] Update CMAKE_PREFIX_PATH --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93a40804..34b8a282 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ endif() # ========== Dependencies Setup ========== # Direct CMake to local CGAL installation +set(CMAKE_PREFIX_PATH ${PROJECT_SOURCE_DIR}/external/cgal/Installation/lib/cmake/CGAL) set(CGAL_DIR ${PROJECT_SOURCE_DIR}/external/cgal/Installation/lib/cmake/CGAL) find_package(CGAL REQUIRED) From e1cab29ddc08137e8a64d7257974c6189ab1bf17 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:10:43 +0800 Subject: [PATCH 053/100] Add submodules: 'recursive' property to action --- .github/workflows/build.yml | 1 + CMakeLists.txt | 3 +-- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0ee98267..c1ebc098 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,6 +24,7 @@ jobs: - name: Checkout Repository uses: actions/checkout@v4 with: + submodules: 'recursive' fetch-depth: 0 # - name: Install Dependencies diff --git a/CMakeLists.txt b/CMakeLists.txt index 34b8a282..67f7451f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,8 +21,7 @@ endif() # ========== Dependencies Setup ========== # Direct CMake to local CGAL installation -set(CMAKE_PREFIX_PATH ${PROJECT_SOURCE_DIR}/external/cgal/Installation/lib/cmake/CGAL) -set(CGAL_DIR ${PROJECT_SOURCE_DIR}/external/cgal/Installation/lib/cmake/CGAL) +set(CGAL_DIR ${PROJECT_SOURCE_DIR}/external/cgal) find_package(CGAL REQUIRED) # Boost diff --git a/README.md b/README.md index 9fedd19b..63ec9247 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Gastner MT, Seguy V, More P. _Fast flow-based algorithm for creating density-equ Data produced by code in this repository are subject to the MIT license found [here](./LICENSE) and should cite the aforementioned paper by Gastner et al. (2018). While cloning this repository, please ensure you use the `--recurse-submodules` flag like so: - +- git clone --recurse-submodules https://github.com/mgastner/cartogram-cpp.git ## Dependencies From c952414ab4f93d02d385387443a3c1fae37c66e1 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:22:26 +0800 Subject: [PATCH 054/100] Add CGAL dependencies to build.yml --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c1ebc098..edff4c6f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,7 +18,7 @@ jobs: - name: Install dependencies uses: awalsh128/cache-apt-pkgs-action@latest with: - packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake + packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev version: 1.0 - name: Checkout Repository From dad1a533efb188621c90a1ce8c3d8475448e6216 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:32:36 +0800 Subject: [PATCH 055/100] Revert "Don't output new_tag to GITHUB_OUTPUT" This reverts commit 8742ee07c35361b0fe5ef9dd35ddc63e85e6cd3a. --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index edff4c6f..75f12266 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -80,6 +80,8 @@ jobs: # Push the tag to the repository git push origin "${short_sha}" + + echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 9c462a296bd45249ca4acc7db067293e3d240e71 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:34:28 +0800 Subject: [PATCH 056/100] Update apt-requirements.txt --- apt-requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apt-requirements.txt b/apt-requirements.txt index 5290d540..5fd38b31 100644 --- a/apt-requirements.txt +++ b/apt-requirements.txt @@ -8,4 +8,7 @@ libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev -cmake \ No newline at end of file +cmake +libgmp3-dev +libmpfr-dev +libeigen3-dev \ No newline at end of file From 89e0ceb0a8ecb3312738cdc389eb1dee7a698a7e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:50:00 +0800 Subject: [PATCH 057/100] Update cmake version requirement --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67f7451f..d9b27b60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.6) +cmake_minimum_required(VERSION 3.30) if(UNIX AND NOT APPLE) set(CMAKE_CXX_COMPILER "g++-11") From 356022dd270342662d4c7600e185a5021018a116 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:53:07 +0800 Subject: [PATCH 058/100] Change apt cache version --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 75f12266..9c84b592 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: uses: awalsh128/cache-apt-pkgs-action@latest with: packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev - version: 1.0 + version: 1.1 - name: Checkout Repository uses: actions/checkout@v4 From fcc787f35246e4f072607db6cbbc680b9624914e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:13:56 +0800 Subject: [PATCH 059/100] Run github action on container --- .github/workflows/build.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9c84b592..b62992e0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,13 +13,17 @@ jobs: build-and-release: runs-on: ubuntu-latest + container: + image: gocartio/cartogram-web:latest steps: - - name: Install dependencies - uses: awalsh128/cache-apt-pkgs-action@latest - with: - packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev - version: 1.1 + + # Dependencies should already be installed in the container + # - name: Install dependencies + # uses: awalsh128/cache-apt-pkgs-action@latest + # with: + # packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev + # version: 1.1 - name: Checkout Repository uses: actions/checkout@v4 From 38e53b170acacd2697f48c1764edc518b0c17e0c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:32:33 +0800 Subject: [PATCH 060/100] Share .vscode settings --- .gitignore | 3 -- .vscode/settings.json | 78 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 7f46a652..82e5392b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,9 +9,6 @@ cartogram /*.geojson /sample_data/*.geojson -# Ignore files generated by Visual Studio Code -.vscode - # Ignore DS_Store files created by macOS **/.DS_Store **/.cache* diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..ba5f7491 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,78 @@ +{ + "files.associations": { + "__config": "cpp", + "__verbose_abort": "cpp", + "array": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdlib": "cpp", + "initializer_list": "cpp", + "limits": "cpp", + "numbers": "cpp", + "concepts": "cpp", + "algorithm": "cpp", + "type_traits": "cpp", + "__hash_table": "cpp", + "__split_buffer": "cpp", + "__tree": "cpp", + "atomic": "cpp", + "deque": "cpp", + "hash_map": "cpp", + "forward_list": "cpp", + "ios": "cpp", + "list": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "system_error": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "*.tcc": "cpp", + "any": "cpp", + "cmath": "cpp", + "cstdio": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "bit": "cpp", + "cctype": "cpp", + "charconv": "cpp", + "chrono": "cpp", + "compare": "cpp", + "exception": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "string_view": "cpp", + "tuple": "cpp", + "utility": "cpp", + "format": "cpp", + "fstream": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "new": "cpp", + "ostream": "cpp", + "span": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "cinttypes": "cpp", + "typeinfo": "cpp", + "variant": "cpp", + "__bit_reference": "cpp", + "__threading_support": "cpp", + "execution": "cpp" + }, + "files.exclude": { + "**/cgal": true + } +} \ No newline at end of file From 37dc440371811d87dfd9eb5eec533e46509f4c7d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:29:52 +0800 Subject: [PATCH 061/100] Add .geojson in write_geojson instead of main.cpp --- src/cartogram_info/write_geojson.cpp | 6 +++--- src/main.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cartogram_info/write_geojson.cpp b/src/cartogram_info/write_geojson.cpp index 7a3f17f2..a3a13bdd 100644 --- a/src/cartogram_info/write_geojson.cpp +++ b/src/cartogram_info/write_geojson.cpp @@ -177,7 +177,7 @@ void CartogramInfo::write_geojson( const std::string &new_geo_file_name, const bool output_to_stdout) { - std::cerr << "Writing " << new_geo_file_name << std::endl; + std::cerr << "Writing " << new_geo_file_name << ".geojson" << std::endl; std::ifstream old_file(old_geo_file_name); nlohmann::json old_json; old_file >> old_json; @@ -191,9 +191,9 @@ void CartogramInfo::write_geojson( nlohmann::json combined_json; combined_json["Simplified"] = new_json; combined_json["Original"] = new_json_original; - std::cout << combined_json << std::endl; + // std::cout << combined_json << std::endl; } else { - std::ofstream o(new_geo_file_name); + std::ofstream o(new_geo_file_name + ".geojson"); o << new_json << std::endl; } } diff --git a/src/main.cpp b/src/main.cpp index 205da616..d61054ce 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -159,7 +159,7 @@ int main(const int argc, const char *argv[]) // Output to GeoJSON cart_info.write_geojson( geo_file_name, - map_name + "_equal_area.geojson", + map_name + "_equal_area", output_to_stdout); return EXIT_SUCCESS; } @@ -194,7 +194,7 @@ int main(const int argc, const char *argv[]) // Output rescaled GeoJSON cart_info.write_geojson( geo_file_name, - map_name + "_input.geojson", + map_name + "_input", output_to_stdout); // Set up Fourier transforms @@ -386,7 +386,7 @@ int main(const int argc, const char *argv[]) if (world) { cart_info.write_geojson( geo_file_name, - map_name + "_cartogram_in_smyth_projection.geojson", + map_name + "_cartogram_in_smyth_projection", output_to_stdout); inset_state.revert_smyth_craster_projection(); } @@ -422,7 +422,7 @@ int main(const int argc, const char *argv[]) // Output to GeoJSON cart_info.write_geojson( geo_file_name, - map_name + "_cartogram.geojson", + map_name + "_cartogram", output_to_stdout); // Stop of main function time From 788ffd1ac28d4cf4716ded9e715af652ee15050c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:34:28 +0800 Subject: [PATCH 062/100] Move external libraries to external folder --- {include => external}/argparse.hpp | 0 {include => external}/indicators.hpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename {include => external}/argparse.hpp (100%) rename {include => external}/indicators.hpp (99%) diff --git a/include/argparse.hpp b/external/argparse.hpp similarity index 100% rename from include/argparse.hpp rename to external/argparse.hpp diff --git a/include/indicators.hpp b/external/indicators.hpp similarity index 99% rename from include/indicators.hpp rename to external/indicators.hpp index 3832d665..857e25b7 100644 --- a/include/indicators.hpp +++ b/external/indicators.hpp @@ -1666,7 +1666,7 @@ static inline std::wstring utf8_decode(const std::string& s) { setlocale(LC_ALL, curLocale.c_str()); return result; } -#else +#else static inline std::wstring utf8_decode(const std::string& s) { auto r = setlocale(LC_ALL, ""); std::string curLocale; From 4420edb2b986e8cb94edf7c51f94f04278a1c97e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:34:42 +0800 Subject: [PATCH 063/100] Ignore all external libraries in vscode --- .vscode/settings.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index ba5f7491..7cbcc373 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -73,6 +73,6 @@ "execution": "cpp" }, "files.exclude": { - "**/cgal": true + "**/external": true } } \ No newline at end of file From ba8b62b684dfa58d198215f3bee3be50ba9ecd0b Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:35:35 +0800 Subject: [PATCH 064/100] std::cerr --> std::cout --- src/inset_state/inset_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inset_state/inset_state.cpp b/src/inset_state/inset_state.cpp index cd58a931..4eef1f8f 100644 --- a/src/inset_state/inset_state.cpp +++ b/src/inset_state/inset_state.cpp @@ -182,7 +182,7 @@ bool InsetState::insert_constraint_safely(const Point &p1, const Point &p2) proj_qd_.dt.insert_constraint(p1, p2); return true; } catch (const std::exception &e) { - std::cout << "WARNING DIAGONAL: Could not insert constraint between " << p1 + std::cerr << "WARNING DIAGONAL: Could not insert constraint between " << p1 << " and " << p2 << std::endl; std::cerr << e.what() << std::endl; // Add to the list of failed constraints From 67191fa56057e44474304ca5ef85430b6c21961c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:52:20 +0800 Subject: [PATCH 065/100] Compile stdout before printing all at once at end --- include/cartogram_info.hpp | 2 ++ src/cartogram_info/cartogram_info.cpp | 5 +++++ src/cartogram_info/write_geojson.cpp | 2 +- src/main.cpp | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/cartogram_info.hpp b/include/cartogram_info.hpp index 6fe66bd6..2a96be3c 100644 --- a/include/cartogram_info.hpp +++ b/include/cartogram_info.hpp @@ -11,6 +11,7 @@ class CartogramInfo std::string id_header_; std::set ids_in_visual_variables_file_; std::map inset_states_; + nlohmann::json stdout_json; bool is_world_map_; std::string map_name_; @@ -33,6 +34,7 @@ class CartogramInfo const nlohmann::json &); [[nodiscard]] unsigned int n_geo_divs() const; [[nodiscard]] unsigned int n_insets() const; + void output_to_stdout() const; void read_csv(const argparse::ArgumentParser &); void read_geojson(const std::string &, bool, std::string &); std::map &ref_to_inset_states(); diff --git a/src/cartogram_info/cartogram_info.cpp b/src/cartogram_info/cartogram_info.cpp index 48ffa5a8..b8af3818 100644 --- a/src/cartogram_info/cartogram_info.cpp +++ b/src/cartogram_info/cartogram_info.cpp @@ -56,6 +56,11 @@ unsigned int CartogramInfo::n_insets() const return inset_states_.size(); } +void CartogramInfo::output_to_stdout() const +{ + std::cout << stdout_json << std::endl; +} + std::map &CartogramInfo::ref_to_inset_states() { return inset_states_; diff --git a/src/cartogram_info/write_geojson.cpp b/src/cartogram_info/write_geojson.cpp index a3a13bdd..3b2e7e1e 100644 --- a/src/cartogram_info/write_geojson.cpp +++ b/src/cartogram_info/write_geojson.cpp @@ -191,7 +191,7 @@ void CartogramInfo::write_geojson( nlohmann::json combined_json; combined_json["Simplified"] = new_json; combined_json["Original"] = new_json_original; - // std::cout << combined_json << std::endl; + stdout_json[new_geo_file_name] = combined_json; } else { std::ofstream o(new_geo_file_name + ".geojson"); o << new_json << std::endl; diff --git a/src/main.cpp b/src/main.cpp index d61054ce..c120416c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -425,6 +425,9 @@ int main(const int argc, const char *argv[]) map_name + "_cartogram", output_to_stdout); + // Write final JSON to stdout, if requested + if (output_to_stdout) cart_info.output_to_stdout(); + // Stop of main function time time_tracker.stop("Total Time"); From b85a2cab0e7b41b23d0a19b3777d2c652beb9a5d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:56:49 +0800 Subject: [PATCH 066/100] Attempt run on docker container --- .github/workflows/build.yml | 14 +++++++------- apt-requirements.txt | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b62992e0..b982758b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,11 +18,17 @@ jobs: steps: + - name: Install Dependencies + run: | + apt update -y + add-apt-repository ppa:ubuntu-toolchain-r/test + cat apt-requirements.txt | xargs apt install -y + # Dependencies should already be installed in the container # - name: Install dependencies # uses: awalsh128/cache-apt-pkgs-action@latest # with: - # packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev + # packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev git # version: 1.1 - name: Checkout Repository @@ -31,12 +37,6 @@ jobs: submodules: 'recursive' fetch-depth: 0 - # - name: Install Dependencies - # run: | - # sudo apt update - # sudo add-apt-repository ppa:ubuntu-toolchain-r/test - # cat apt-requirements.txt | xargs sudo apt install -y - - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} diff --git a/apt-requirements.txt b/apt-requirements.txt index 5fd38b31..40364bd2 100644 --- a/apt-requirements.txt +++ b/apt-requirements.txt @@ -1,3 +1,4 @@ +git gcc-11 g++-11 build-essential From c6630fcda4133a9187b55ad0b4c43b63adc45981 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:39:17 +0800 Subject: [PATCH 067/100] Don't add ubuntu-toolchain-r/test --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b982758b..d9c07a88 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,7 +21,7 @@ jobs: - name: Install Dependencies run: | apt update -y - add-apt-repository ppa:ubuntu-toolchain-r/test + # add-apt-repository ppa:ubuntu-toolchain-r/test cat apt-requirements.txt | xargs apt install -y # Dependencies should already be installed in the container From f9e146ba9d3986e8fa524d3830106f876de9bd00 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:41:45 +0800 Subject: [PATCH 068/100] Install packages without xargs apt install -y --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d9c07a88..f38d4524 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,7 +22,8 @@ jobs: run: | apt update -y # add-apt-repository ppa:ubuntu-toolchain-r/test - cat apt-requirements.txt | xargs apt install -y + # cat apt-requirements.txt | xargs apt install -y + apt install -y git gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev # Dependencies should already be installed in the container # - name: Install dependencies From 40022da49f3e1230f69d2afb2a05d6033784d17e Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:49:07 +0800 Subject: [PATCH 069/100] Change minimum required version of CMake to 3.25 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d9b27b60..8396ad56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.30) +cmake_minimum_required(VERSION 3.25) if(UNIX AND NOT APPLE) set(CMAKE_CXX_COMPILER "g++-11") From 580c1ed7d92a83ba6d8d18ecd820f0f2f031dca4 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:56:06 +0800 Subject: [PATCH 070/100] Specify directory containing bin --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f38d4524..924620ed 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -62,7 +62,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: cartogram - path: ./build/bin/cartogram + path: ${{github.workspace}}/build/bin/cartogram - name: Generate and Push Tag id: generate_tag From 31ec78c4f26a93a1718309a6c0e09a2ded978f94 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:13:49 +0800 Subject: [PATCH 071/100] Print working directory for docker --- .github/workflows/build.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 924620ed..872f076a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,11 +58,15 @@ jobs: # chmod +x stress_test.sh # bash stress_test.sh + - name: Figure out working directory + run: | + pwd + - name: Upload Artifact uses: actions/upload-artifact@v4 with: name: cartogram - path: ${{github.workspace}}/build/bin/cartogram + path: /usr/local/bin/cartogram - name: Generate and Push Tag id: generate_tag From 33aa3a867661bcd1b829a27cbf911afe0ed81290 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:28:54 +0800 Subject: [PATCH 072/100] Remove git --local tags --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 872f076a..ac021990 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -72,8 +72,8 @@ jobs: id: generate_tag run: | # Configure git committer - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" + git config user.email "action@github.com" + git config user.name "GitHub Action" # Fetch tags git fetch --tags From 618ec495f674d8be8b4649f12d1a0eec1b18ae9d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:07:18 +0800 Subject: [PATCH 073/100] Implement Copilot suggestions --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ac021990..38b52554 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: apt update -y # add-apt-repository ppa:ubuntu-toolchain-r/test # cat apt-requirements.txt | xargs apt install -y - apt install -y git gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev + apt install -y git gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev tree dbus polkitd # Dependencies should already be installed in the container # - name: Install dependencies @@ -61,6 +61,8 @@ jobs: - name: Figure out working directory run: | pwd + ls + tree - name: Upload Artifact uses: actions/upload-artifact@v4 From e69302d2b68682ec6e96de576e3931b09a3d1916 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:55:36 +0800 Subject: [PATCH 074/100] Reduce dependencies, don't fetch --tags --- .github/workflows/build.yml | 17 ++--------------- CMakeLists.txt | 1 - 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 38b52554..95ad9317 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,17 +20,7 @@ jobs: - name: Install Dependencies run: | - apt update -y - # add-apt-repository ppa:ubuntu-toolchain-r/test - # cat apt-requirements.txt | xargs apt install -y - apt install -y git gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev tree dbus polkitd - - # Dependencies should already be installed in the container - # - name: Install dependencies - # uses: awalsh128/cache-apt-pkgs-action@latest - # with: - # packages: gcc-11 g++-11 build-essential manpages-dev software-properties-common nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev libboost-all-dev cmake libgmp3-dev libmpfr-dev libeigen3-dev git - # version: 1.1 + apt install -y git g++-11 build-essential cmake libboost-all-dev - name: Checkout Repository uses: actions/checkout@v4 @@ -61,7 +51,7 @@ jobs: - name: Figure out working directory run: | pwd - ls + ls -a tree - name: Upload Artifact @@ -77,9 +67,6 @@ jobs: git config user.email "action@github.com" git config user.name "GitHub Action" - # Fetch tags - git fetch --tags - # Get the current date current_date=$(date +'%Y-%m-%d') diff --git a/CMakeLists.txt b/CMakeLists.txt index 8396ad56..9a8767aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.25) if(UNIX AND NOT APPLE) set(CMAKE_CXX_COMPILER "g++-11") - set(CMAKE_C_COMPILER "gcc-11") endif() project(cartogram LANGUAGES CXX) From d2a7cd3470f0def248557eff509402f92347cedb Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:29:59 +0800 Subject: [PATCH 075/100] Track maximum progress reached --- include/progress_tracker.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/progress_tracker.hpp b/include/progress_tracker.hpp index 4de72f62..40cd4732 100644 --- a/include/progress_tracker.hpp +++ b/include/progress_tracker.hpp @@ -26,6 +26,7 @@ class ProgressTracker private: double total_geo_divs_; // Total number of GeoDivs to monitor progress double progress_; // Progress measured on a scale from 0 (start) to 1 (end) + double max_progress_; // Maximum progress value ever reached indicators::ProgressBar bar_; }; From 894ddeb7d7df9ae33891e8ee212c1eb834548638 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:30:33 +0800 Subject: [PATCH 076/100] Always increment progress by minimum amount --- src/misc/progress_tracker.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/misc/progress_tracker.cpp b/src/misc/progress_tracker.cpp index 70052186..b98f14be 100644 --- a/src/misc/progress_tracker.cpp +++ b/src/misc/progress_tracker.cpp @@ -40,6 +40,11 @@ void ProgressTracker::print_progress_mid_integration( // finished insets const double inset_max_frac = inset_state.n_geo_divs() / total_geo_divs_; double progress = progress_ + (inset_max_frac / n_predicted_integrations); + + // Increase max_progress by 0.5% at least + progress = std::max(progress, max_progress_ + 0.005); + + max_progress_ = progress; print_progress(progress); print_progress_bar(progress); } @@ -49,6 +54,7 @@ void ProgressTracker::print_progress_mid_integration( void ProgressTracker::update_and_print_progress_end_integration( const InsetState &inset_state) { + max_progress_ = 0; const double inset_max_frac = inset_state.n_geo_divs() / total_geo_divs_; progress_ += inset_max_frac; print_progress(progress_); From d2b522319d88a1601f8afc6cfa8b6edf0fd3d1a2 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:30:52 +0800 Subject: [PATCH 077/100] Prevent progress bar from reaching 100 prematurely --- src/misc/progress_tracker.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/misc/progress_tracker.cpp b/src/misc/progress_tracker.cpp index b98f14be..ef686420 100644 --- a/src/misc/progress_tracker.cpp +++ b/src/misc/progress_tracker.cpp @@ -41,6 +41,9 @@ void ProgressTracker::print_progress_mid_integration( const double inset_max_frac = inset_state.n_geo_divs() / total_geo_divs_; double progress = progress_ + (inset_max_frac / n_predicted_integrations); + // Leave buffer at end so that we don't reach 100% prematurely + progress = std::min(progress, 0.935); + // Increase max_progress by 0.5% at least progress = std::max(progress, max_progress_ + 0.005); From e6d3982426698c20a571917fe888c4795fcee381 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:38:26 +0800 Subject: [PATCH 078/100] Use dynamic progress increment --- src/misc/progress_tracker.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/misc/progress_tracker.cpp b/src/misc/progress_tracker.cpp index ef686420..057d45a3 100644 --- a/src/misc/progress_tracker.cpp +++ b/src/misc/progress_tracker.cpp @@ -41,11 +41,15 @@ void ProgressTracker::print_progress_mid_integration( const double inset_max_frac = inset_state.n_geo_divs() / total_geo_divs_; double progress = progress_ + (inset_max_frac / n_predicted_integrations); + // Change how much progress increases by, so it never reaches 100 here + double remaining_progress = 1.0 - max_progress_; + double dynamic_increment = remaining_progress * 0.1; + // Leave buffer at end so that we don't reach 100% prematurely progress = std::min(progress, 0.935); // Increase max_progress by 0.5% at least - progress = std::max(progress, max_progress_ + 0.005); + progress = std::max(progress, max_progress_ + dynamic_increment); max_progress_ = progress; print_progress(progress); From 1b90bad46fb5c4c0e55d9839a57af3accc31c27c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:48:40 +0800 Subject: [PATCH 079/100] Temper progress bar at start, make buffer 0.75 --- src/misc/progress_tracker.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/misc/progress_tracker.cpp b/src/misc/progress_tracker.cpp index 057d45a3..e17cc41e 100644 --- a/src/misc/progress_tracker.cpp +++ b/src/misc/progress_tracker.cpp @@ -46,7 +46,13 @@ void ProgressTracker::print_progress_mid_integration( double dynamic_increment = remaining_progress * 0.1; // Leave buffer at end so that we don't reach 100% prematurely - progress = std::min(progress, 0.935); + progress = std::min(progress, 0.75); + + // Our assumption above causes the progress bar to start at 36%. + // Thus, we temper it down for the first few integrations. + if (inset_state.n_finished_integrations() < 4) { + progress = std::min(progress, max_progress_); + } // Increase max_progress by 0.5% at least progress = std::max(progress, max_progress_ + dynamic_increment); From c71c44200cf433de678b51f97e9e9bdb1c86e196 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:51:30 +0800 Subject: [PATCH 080/100] Fixes #175 and significantly improves pb smoothness --- src/misc/progress_tracker.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/misc/progress_tracker.cpp b/src/misc/progress_tracker.cpp index e17cc41e..c574d3e7 100644 --- a/src/misc/progress_tracker.cpp +++ b/src/misc/progress_tracker.cpp @@ -54,7 +54,8 @@ void ProgressTracker::print_progress_mid_integration( progress = std::min(progress, max_progress_); } - // Increase max_progress by 0.5% at least + // Increase max_progress by dynamic increment that gets smaller + // as we get closer to 100%. progress = std::max(progress, max_progress_ + dynamic_increment); max_progress_ = progress; From fb5674750821a493ab5c2eca3afabe04878565da Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:08:41 +0800 Subject: [PATCH 081/100] Update apt, remove "Figure out working directory" step --- .github/workflows/build.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 95ad9317..1a392d0a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,6 +20,7 @@ jobs: - name: Install Dependencies run: | + apt update -y apt install -y git g++-11 build-essential cmake libboost-all-dev - name: Checkout Repository @@ -47,13 +48,6 @@ jobs: # cd tests/ # chmod +x stress_test.sh # bash stress_test.sh - - - name: Figure out working directory - run: | - pwd - ls -a - tree - - name: Upload Artifact uses: actions/upload-artifact@v4 with: @@ -67,6 +61,9 @@ jobs: git config user.email "action@github.com" git config user.name "GitHub Action" + pwd + ls -la + # Get the current date current_date=$(date +'%Y-%m-%d') From 3f2aadd2aca6e748d9aac31a8545b02f2a7debd4 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:16:12 +0800 Subject: [PATCH 082/100] Modify docker container environment in actions --- .github/workflows/build.yml | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a392d0a..e7dd43a1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,15 +6,15 @@ on: push: branches: [ "main", "pr/adisidev/201" ] -env: - BUILD_TYPE: Release - jobs: build-and-release: runs-on: ubuntu-latest container: image: gocartio/cartogram-web:latest + env: + BUILD_TYPE: Release + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: @@ -57,13 +57,13 @@ jobs: - name: Generate and Push Tag id: generate_tag run: | + pwd + ls -la + # Configure git committer git config user.email "action@github.com" git config user.name "GitHub Action" - pwd - ls -la - # Get the current date current_date=$(date +'%Y-%m-%d') @@ -77,14 +77,10 @@ jobs: git push origin "${short_sha}" echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Release uses: softprops/action-gh-release@v2 with: tag_name: ${{ steps.generate_tag.outputs.new_tag }} files: build/bin/cartogram - body: "Rolling release ${{ steps.generate_tag.outputs.release_number }}" - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + body: "Rolling release ${{ steps.generate_tag.outputs.release_number }}" \ No newline at end of file From 30572a95723d1e51abff2f9f9eddc80f07878fdb Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:19:56 +0800 Subject: [PATCH 083/100] Change where .env is defined --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e7dd43a1..4638579b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,16 +6,16 @@ on: push: branches: [ "main", "pr/adisidev/201" ] +env: + BUILD_TYPE: Release + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + jobs: build-and-release: runs-on: ubuntu-latest container: image: gocartio/cartogram-web:latest - env: - BUILD_TYPE: Release - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - name: Install Dependencies @@ -64,7 +64,7 @@ jobs: git config user.email "action@github.com" git config user.name "GitHub Action" - # Get the current date + # # t the current date current_date=$(date +'%Y-%m-%d') # Get the short SHA of the current HEAD From 809580940e310856ab9e0b51798b33164cb22c86 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:32:18 +0800 Subject: [PATCH 084/100] Separate build and release job --- .github/workflows/build.yml | 60 ++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4638579b..f20e3109 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,10 +8,9 @@ on: env: BUILD_TYPE: Release - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} jobs: - build-and-release: + build-and-upload: runs-on: ubuntu-latest container: @@ -54,33 +53,52 @@ jobs: name: cartogram path: /usr/local/bin/cartogram - - name: Generate and Push Tag - id: generate_tag - run: | - pwd - ls -la + release: - # Configure git committer - git config user.email "action@github.com" - git config user.name "GitHub Action" + runs-on: ubuntu-latest + needs: build-and-upload + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Download Artifact + uses: actions/download-artifact@v4 + with: + name: cartogram + + - name: Generate and Push Tag + id: generate_tag + run: | + # Configure git committer + git config user.email "action@github.com" + git config user.name "GitHub Action" - # # t the current date - current_date=$(date +'%Y-%m-%d') + # Get the current date + current_date=$(date +'%Y-%m-%d') - # Get the short SHA of the current HEAD - short_sha=$(git rev-parse --short HEAD) + # Get the short SHA of the current HEAD + short_sha=$(git rev-parse --short HEAD) - # Create the new tag - git tag -a "${short_sha}" -m "New release on ${current_date}" + # Create the new tag + git tag -a "${short_sha}" -m "New release on ${current_date}" - # Push the tag to the repository - git push origin "${short_sha}" + # Push the tag to the repository + git push origin "${short_sha}" - echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT + echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Release uses: softprops/action-gh-release@v2 with: tag_name: ${{ steps.generate_tag.outputs.new_tag }} - files: build/bin/cartogram - body: "Rolling release ${{ steps.generate_tag.outputs.release_number }}" \ No newline at end of file + files: cartogram + body: "Rolling release ${{ steps.generate_tag.outputs.new_tag }}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From e70c77e7fbc4e5757c090002d7c31d66abf9be8f Mon Sep 17 00:00:00 2001 From: nihalzp <81457724+nihalzp@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:15:45 +0800 Subject: [PATCH 085/100] Only output simplified and original version to stdout --- src/cartogram_info/write_geojson.cpp | 2 +- src/main.cpp | 47 +++++++++++----------------- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/src/cartogram_info/write_geojson.cpp b/src/cartogram_info/write_geojson.cpp index 3b2e7e1e..a0174239 100644 --- a/src/cartogram_info/write_geojson.cpp +++ b/src/cartogram_info/write_geojson.cpp @@ -191,7 +191,7 @@ void CartogramInfo::write_geojson( nlohmann::json combined_json; combined_json["Simplified"] = new_json; combined_json["Original"] = new_json_original; - stdout_json[new_geo_file_name] = combined_json; + std::cout << combined_json << std::endl; } else { std::ofstream o(new_geo_file_name + ".geojson"); o << new_json << std::endl; diff --git a/src/main.cpp b/src/main.cpp index c120416c..b3cb1c0a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -121,21 +121,6 @@ int main(const int argc, const char *argv[]) return EXIT_FAILURE; } - if (simplify) { - std::cerr << "Start of initial simplification of " << inset_pos - << std::endl; - time_tracker.start("Simplification"); - - // Simplification reduces the number of points used to represent the - // GeoDivs in the inset, thereby reducing output file sizes and - // run-times - inset_state.simplify(target_points_per_inset); - - // Update time - time_tracker.stop("Simplification"); - } - std::cerr << "End of initial simplification of " << inset_pos << std::endl; - // End of inset time time_tracker.stop("Inset " + inset_pos); } @@ -160,7 +145,7 @@ int main(const int argc, const char *argv[]) cart_info.write_geojson( geo_file_name, map_name + "_equal_area", - output_to_stdout); + false); return EXIT_SUCCESS; } @@ -191,11 +176,26 @@ int main(const int argc, const char *argv[]) inset_state.store_original_geo_divs(); } + if (simplify) { + std::cerr << "Start of initial simplification of " << inset_pos + << std::endl; + time_tracker.start("Simplification"); + + // Simplification reduces the number of points used to represent the + // GeoDivs in the inset, thereby reducing output file sizes and + // run-times + inset_state.simplify(target_points_per_inset); + + // Update time + time_tracker.stop("Simplification"); + } + std::cerr << "End of initial simplification of " << inset_pos << std::endl; + // Output rescaled GeoJSON cart_info.write_geojson( geo_file_name, - map_name + "_input", - output_to_stdout); + map_name + "_input_processed", + false); // Set up Fourier transforms const unsigned int lx = inset_state.lx(); @@ -383,14 +383,6 @@ int main(const int argc, const char *argv[]) plot_grid); } - if (world) { - cart_info.write_geojson( - geo_file_name, - map_name + "_cartogram_in_smyth_projection", - output_to_stdout); - inset_state.revert_smyth_craster_projection(); - } - if (output_to_stdout and !qtdt_method) { inset_state.fill_grid_diagonals(true); inset_state.project_with_cum_proj(); @@ -425,9 +417,6 @@ int main(const int argc, const char *argv[]) map_name + "_cartogram", output_to_stdout); - // Write final JSON to stdout, if requested - if (output_to_stdout) cart_info.output_to_stdout(); - // Stop of main function time time_tracker.stop("Total Time"); From 564e94b8710d09fab2477cc7b3564ddafee4a278 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:50:29 +0800 Subject: [PATCH 086/100] Revert "Compile stdout before printing all at once at end" This reverts commit 67191fa56057e44474304ca5ef85430b6c21961c. --- include/cartogram_info.hpp | 2 -- src/cartogram_info/cartogram_info.cpp | 5 ----- 2 files changed, 7 deletions(-) diff --git a/include/cartogram_info.hpp b/include/cartogram_info.hpp index 2a96be3c..6fe66bd6 100644 --- a/include/cartogram_info.hpp +++ b/include/cartogram_info.hpp @@ -11,7 +11,6 @@ class CartogramInfo std::string id_header_; std::set ids_in_visual_variables_file_; std::map inset_states_; - nlohmann::json stdout_json; bool is_world_map_; std::string map_name_; @@ -34,7 +33,6 @@ class CartogramInfo const nlohmann::json &); [[nodiscard]] unsigned int n_geo_divs() const; [[nodiscard]] unsigned int n_insets() const; - void output_to_stdout() const; void read_csv(const argparse::ArgumentParser &); void read_geojson(const std::string &, bool, std::string &); std::map &ref_to_inset_states(); diff --git a/src/cartogram_info/cartogram_info.cpp b/src/cartogram_info/cartogram_info.cpp index b8af3818..48ffa5a8 100644 --- a/src/cartogram_info/cartogram_info.cpp +++ b/src/cartogram_info/cartogram_info.cpp @@ -56,11 +56,6 @@ unsigned int CartogramInfo::n_insets() const return inset_states_.size(); } -void CartogramInfo::output_to_stdout() const -{ - std::cout << stdout_json << std::endl; -} - std::map &CartogramInfo::ref_to_inset_states() { return inset_states_; From 4bf68ed6f78137506d82cfcbd3c77e7cc6de21a0 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:51:01 +0800 Subject: [PATCH 087/100] Explain meaning of processed as a comment --- src/main.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index b3cb1c0a..fc9f502d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -194,6 +194,8 @@ int main(const int argc, const char *argv[]) // Output rescaled GeoJSON cart_info.write_geojson( geo_file_name, + // processed = simplified + rescaled + // and potentially projected + small polygons removed map_name + "_input_processed", false); From 9e1e00ebdc82c80fce404e2e57aa396a9f1de24c Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:53:49 +0800 Subject: [PATCH 088/100] Don't output to stdout by default --- include/cartogram_info.hpp | 2 +- src/main.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/cartogram_info.hpp b/include/cartogram_info.hpp index 6fe66bd6..901ffae8 100644 --- a/include/cartogram_info.hpp +++ b/include/cartogram_info.hpp @@ -39,7 +39,7 @@ class CartogramInfo void replace_missing_and_zero_target_areas(); std::string set_map_name(const std::string &); void shift_insets_to_target_position(); - void write_geojson(const std::string &, const std::string &, bool); + void write_geojson(const std::string &, const std::string &, bool = false); }; #endif // CARTOGRAM_INFO_HPP_ diff --git a/src/main.cpp b/src/main.cpp index fc9f502d..6dd292a0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -144,8 +144,7 @@ int main(const int argc, const char *argv[]) // Output to GeoJSON cart_info.write_geojson( geo_file_name, - map_name + "_equal_area", - false); + map_name + "_equal_area"); return EXIT_SUCCESS; } @@ -196,8 +195,7 @@ int main(const int argc, const char *argv[]) geo_file_name, // processed = simplified + rescaled // and potentially projected + small polygons removed - map_name + "_input_processed", - false); + map_name + "_input_processed"); // Set up Fourier transforms const unsigned int lx = inset_state.lx(); From 84509389d13dd531a6dc0ed5e854c93bf0d0bb6a Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:26:14 +0800 Subject: [PATCH 089/100] Allow bbox values below -1 --- src/inset_state/inset_state.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inset_state/inset_state.cpp b/src/inset_state/inset_state.cpp index 4eef1f8f..ba97f6be 100644 --- a/src/inset_state/inset_state.cpp +++ b/src/inset_state/inset_state.cpp @@ -419,8 +419,8 @@ void InsetState::create_and_store_quadtree_cell_corners() double rho_max = -1e9; // get the minimum rho_init of the bbox of the node - for (unsigned int i = bbox.xmin(); i < bbox.xmax(); ++i) { - for (unsigned int j = bbox.ymin(); j < bbox.ymax(); ++j) { + for (int i = bbox.xmin(); i < bbox.xmax(); ++i) { + for (int j = bbox.ymin(); j < bbox.ymax(); ++j) { if (i >= this->lx() || j >= this->ly()) { continue; } From 00ca8799b6139b3eea6c94069ce1228e80094b4d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:44:00 +0800 Subject: [PATCH 090/100] cast lx, ly to (int) to compare with bbox --- src/inset_state/inset_state.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inset_state/inset_state.cpp b/src/inset_state/inset_state.cpp index ba97f6be..c12dbc32 100644 --- a/src/inset_state/inset_state.cpp +++ b/src/inset_state/inset_state.cpp @@ -421,10 +421,10 @@ void InsetState::create_and_store_quadtree_cell_corners() // get the minimum rho_init of the bbox of the node for (int i = bbox.xmin(); i < bbox.xmax(); ++i) { for (int j = bbox.ymin(); j < bbox.ymax(); ++j) { - if (i >= this->lx() || j >= this->ly()) { + if (i < 0 || j < 0) { continue; } - if (i < 0 || j < 0) { + if (i >= (int) this->lx() || j >= (int) this->ly()) { continue; } rho_min = std::min(rho_min, this->ref_to_rho_init()(i, j)); From 435b25d4cd13f40844836ff4386a0d9a455536cf Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:56:52 +0800 Subject: [PATCH 091/100] Make cartogram --version equal to RELEASE_TAG --- .github/workflows/build.yml | 53 ++++++++++++++++++++++-------------- CMakeLists.txt | 5 ++++ src/misc/parse_arguments.cpp | 2 +- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f20e3109..b89b4d40 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,9 +10,21 @@ env: BUILD_TYPE: Release jobs: + + set-outputs: + runs-on: ubuntu-latest + outputs: + short_sha: ${{ steps.vars.outputs.short_sha }} + steps: + + - name: Set + id: vars + run: echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + build-and-upload: runs-on: ubuntu-latest + needs: set-outputs container: image: gocartio/cartogram-web:latest steps: @@ -29,7 +41,7 @@ jobs: fetch-depth: 0 - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DRELEASE_TAG=${{ needs.set-outputs.outputs.short_sha }} - name: Build run: | @@ -47,6 +59,7 @@ jobs: # cd tests/ # chmod +x stress_test.sh # bash stress_test.sh + - name: Upload Artifact uses: actions/upload-artifact@v4 with: @@ -56,7 +69,7 @@ jobs: release: runs-on: ubuntu-latest - needs: build-and-upload + needs: [build-and-upload, set-outputs] env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -71,34 +84,32 @@ jobs: with: name: cartogram - - name: Generate and Push Tag - id: generate_tag - run: | - # Configure git committer - git config user.email "action@github.com" - git config user.name "GitHub Action" + # - name: Generate and Push Tag + # id: generate_tag + # run: | + # # Configure git committer + # git config user.email "action@github.com" + # git config user.name "GitHub Action" - # Get the current date - current_date=$(date +'%Y-%m-%d') + # # Get the current date + # current_date=$(date +'%Y-%m-%d') - # Get the short SHA of the current HEAD - short_sha=$(git rev-parse --short HEAD) + # # Get the short SHA of the current HEAD - # Create the new tag - git tag -a "${short_sha}" -m "New release on ${current_date}" + # # Create the new tag + # git tag -a "${{ needs.set-outputs.outputs.short_sha }}" - # Push the tag to the repository - git push origin "${short_sha}" + # # Push the tag to the repository + # git push origin "${{ needs.set-outputs.outputs.short_sha }}" - echo "new_tag=${short_sha}" >> $GITHUB_OUTPUT - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Release uses: softprops/action-gh-release@v2 with: - tag_name: ${{ steps.generate_tag.outputs.new_tag }} + tag_name: ${{ needs.set-outputs.outputs.short_sha }} files: cartogram - body: "Rolling release ${{ steps.generate_tag.outputs.new_tag }}" + body: "Rolling release ${{ needs.set-outputs.outputs.short_sha }}" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a8767aa..d01244f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,9 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) set(CMAKE_COLOR_DIAGNOSTICS ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +# Assume development build by default +set(RELEASE_TAG "development" CACHE STRING "Release tag for the build") + # Default build type if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) @@ -35,6 +38,8 @@ pkg_search_module(cairo REQUIRED cairo IMPORTED_TARGET) file(GLOB_RECURSE CARTOGRAM_SOURCES "src/*.cpp") add_executable(cartogram ${CARTOGRAM_SOURCES}) +target_compile_definitions(cartogram PRIVATE RELEASE_TAG="${RELEASE_TAG}") + # ========== Include Directories ========== target_include_directories(cartogram PUBLIC diff --git a/src/misc/parse_arguments.cpp b/src/misc/parse_arguments.cpp index c5854a7f..330ee75c 100644 --- a/src/misc/parse_arguments.cpp +++ b/src/misc/parse_arguments.cpp @@ -26,7 +26,7 @@ argparse::ArgumentParser parsed_arguments( { // Create parser for arguments using argparse. // From https://github.com/p-ranav/argparse - argparse::ArgumentParser arguments("./cartogram", "2.0"); + argparse::ArgumentParser arguments("./cartogram", RELEASE_TAG); // Positional argument accepting geometry file (GeoJSON, JSON) as input arguments.add_argument("geometry_file") From fea5c1b28fc3523e069d718482d09c8a2170ca85 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:11:07 +0800 Subject: [PATCH 092/100] Push tag before releasing --- .github/workflows/build.yml | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b89b4d40..d738f356 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,10 @@ jobs: short_sha: ${{ steps.vars.outputs.short_sha }} steps: - - name: Set + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Calculate short_sha id: vars run: echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT @@ -84,26 +87,10 @@ jobs: with: name: cartogram - # - name: Generate and Push Tag - # id: generate_tag - # run: | - # # Configure git committer - # git config user.email "action@github.com" - # git config user.name "GitHub Action" - - # # Get the current date - # current_date=$(date +'%Y-%m-%d') - - # # Get the short SHA of the current HEAD - - # # Create the new tag - # git tag -a "${{ needs.set-outputs.outputs.short_sha }}" - - # # Push the tag to the repository - # git push origin "${{ needs.set-outputs.outputs.short_sha }}" - - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Push short_sha Tag + uses: actions-ecosystem/action-push-tag@v1 + with: + tag: ${{ needs.set-outputs.outputs.short_sha }} - name: Release uses: softprops/action-gh-release@v2 From bd7f3c9cf9faa21570afa0b634b881fbd0588647 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:20:45 +0800 Subject: [PATCH 093/100] Use github-script to push tag, add -j4 --- .github/workflows/build.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d738f356..c32fb9a1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,7 +48,7 @@ jobs: - name: Build run: | - cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --target install -j$(nproc) -- + cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --target install -j4 -- # - name: Run CTest # working-directory: ${{github.workspace}}/build @@ -87,16 +87,21 @@ jobs: with: name: cartogram - - name: Push short_sha Tag - uses: actions-ecosystem/action-push-tag@v1 + - name: Push tag + uses: actions/github-script@v5 with: - tag: ${{ needs.set-outputs.outputs.short_sha }} + script: | + github.rest.git.createRef({ + owner: context.repo.owner, + repo: context.repo.repo, + ref: 'refs/tags/${{ needs.set-outputs.outputs.short_sha }}', + sha: context.sha + }) - name: Release uses: softprops/action-gh-release@v2 with: tag_name: ${{ needs.set-outputs.outputs.short_sha }} files: cartogram - body: "Rolling release ${{ needs.set-outputs.outputs.short_sha }}" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From d8f06752cf34d93ac9be808475abf8454eeb874d Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:31:36 +0800 Subject: [PATCH 094/100] Remove github script to push tag (warning msg) --- .github/workflows/build.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c32fb9a1..38204cd6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -91,12 +91,10 @@ jobs: uses: actions/github-script@v5 with: script: | - github.rest.git.createRef({ - owner: context.repo.owner, - repo: context.repo.repo, - ref: 'refs/tags/${{ needs.set-outputs.outputs.short_sha }}', - sha: context.sha - }) + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git tag ${{ needs.set-outputs.outputs.short_sha }} + git push origin ${{ needs.set-outputs.outputs.short_sha }} - name: Release uses: softprops/action-gh-release@v2 From 623487fde121f4f8a098a2864715b35df65cf7b3 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:44:48 +0800 Subject: [PATCH 095/100] Remove `uses: actions/github-script@v5` --- .github/workflows/build.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 38204cd6..5685fc51 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -88,13 +88,11 @@ jobs: name: cartogram - name: Push tag - uses: actions/github-script@v5 - with: - script: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git tag ${{ needs.set-outputs.outputs.short_sha }} - git push origin ${{ needs.set-outputs.outputs.short_sha }} + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git tag ${{ needs.set-outputs.outputs.short_sha }} + git push origin ${{ needs.set-outputs.outputs.short_sha }} - name: Release uses: softprops/action-gh-release@v2 From e3e0bacafe4f598db3f089c18ce094e8387e49c1 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:31:24 +0800 Subject: [PATCH 096/100] Moved deployment to go-cart-io/cartogram-docker --- .github/workflows/deploy.yml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml deleted file mode 100644 index 691d1e39..00000000 --- a/.github/workflows/deploy.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: Depoy (to go-cart.io) - -on: - # only when triggered manually, after we have tested it ourselves - workflow_dispatch: - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - name: Deploy binary to go-cart.io - uses: appleboy/ssh-action@v1.0.3 - with: - host: ${{ secrets.DEPLOY_HOST }} - username: ${{ secrets.DEPLOY_USER }} - key: ${{ secrets.DEPLOY_SSH_KEY }} - script: /home/cartogram/deploy-cartogram-cpp.sh \ No newline at end of file From bb537f8c561e7701eb35332dcbc93ebe7b1dba57 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 6 Nov 2024 18:01:10 +0800 Subject: [PATCH 097/100] Add troubleshooting step about CGAL submodule --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 63ec9247..43ccce76 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ Using lesser cores than you have is recommended so that your computer still has - If running `cmake -B build` gives you an error, it is likely that a dependency was not installed correctly. Rerun the appropriate commands above to install the required dependencies and try again. - If you get an error which mentions permission issues, try running the command that gave you the error with `sudo` prefixed, as done with `sudo make install -C build` above. - If `cmake` complains that it could not find a particular library, please try uninstalling it and installing it again. After reinstalling it, please also unlink it and link it with the `--force` flag. +- If you get errors related to CGAL, it's likely you have another version of CGAL installed on your computer that is getting chosen instead of the one contained as a submodule within this repository. It's also possible that when cloning this repository, the `--recurse-submodule` flag was missing. Try running `git submodule init` and `git submodule update` in the root directory of the repository. ### Usage From 963e880b84287c389dc7f6eb4ba5391ba2b6086b Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 6 Nov 2024 18:03:14 +0800 Subject: [PATCH 098/100] write_csv(): write current cartogram state as csv --- include/cartogram_info.hpp | 1 + src/cartogram_info/cartogram_info.cpp | 34 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/cartogram_info.hpp b/include/cartogram_info.hpp index 901ffae8..ccb9389c 100644 --- a/include/cartogram_info.hpp +++ b/include/cartogram_info.hpp @@ -39,6 +39,7 @@ class CartogramInfo void replace_missing_and_zero_target_areas(); std::string set_map_name(const std::string &); void shift_insets_to_target_position(); + void write_csv(const std::string &csv_file_name); void write_geojson(const std::string &, const std::string &, bool = false); }; diff --git a/src/cartogram_info/cartogram_info.cpp b/src/cartogram_info/cartogram_info.cpp index 48ffa5a8..c439b90d 100644 --- a/src/cartogram_info/cartogram_info.cpp +++ b/src/cartogram_info/cartogram_info.cpp @@ -1,5 +1,6 @@ #include "cartogram_info.hpp" #include "constants.hpp" +#include "csv.hpp" #include #include @@ -204,3 +205,36 @@ std::string CartogramInfo::set_map_name(const std::string &map_name) } return map_name_; } + +void CartogramInfo::write_csv(const std::string &csv_file_name) { + // Write a csv file with the current target areas + std::ofstream out_file_csv; + out_file_csv.open(csv_file_name + ".csv"); + if (!out_file_csv) { + std::cerr + << "ERROR writing GeoJSON: failed to open " << csv_file_name << ".csv" + << std::endl; + } + + // Each vector of strings will represent one row, starting with column names + std::vector > csv_rows(1); + + csv_rows[0].push_back(id_header_); + csv_rows[0].push_back("Target Area"); + + // Fill up the rows with the IDs and target areas + for (const auto &[id, inset_pos] : gd_to_inset_) { + const auto &inset_state = inset_states_.at(inset_pos); + const auto target_area = inset_state.target_area_at(id); + csv_rows.push_back({id, std::to_string(target_area)}); + } + + // Write to CSV object + auto writer = csv::make_csv_writer(out_file_csv); + for (const auto &row : csv_rows) { + writer << row; + } + + // Close out_file and exit + out_file_csv.close(); +} \ No newline at end of file From c489c3ef9e004f53c3d036bf2f7a673ae206d8f0 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Wed, 6 Nov 2024 18:04:02 +0800 Subject: [PATCH 099/100] Put outputting processed input behind flag --- include/parse_arguments.hpp | 3 ++- src/main.cpp | 27 ++++++++++++++++----------- src/misc/parse_arguments.cpp | 8 +++++++- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/parse_arguments.hpp b/include/parse_arguments.hpp index 305d7976..932cc1b6 100644 --- a/include/parse_arguments.hpp +++ b/include/parse_arguments.hpp @@ -25,6 +25,7 @@ argparse::ArgumentParser parsed_arguments( bool &remove_tiny_polygons, double &minimum_polygon_area, bool &plot_quadtree, - bool &rays); + bool &rays, + bool &output_preprocessed); #endif // PARSE_ARGUMENTS_HPP_ diff --git a/src/main.cpp b/src/main.cpp index 6dd292a0..ca32acff 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,7 +32,8 @@ int main(const int argc, const char *argv[]) // Other boolean values that are needed to parse the command line arguments bool make_csv, output_equal_area, output_to_stdout, plot_density, plot_grid, - plot_intersections, plot_polygons, plot_quadtree, remove_tiny_polygons; + plot_intersections, plot_polygons, plot_quadtree, remove_tiny_polygons, + output_preprocessed; // If the proportion of the polygon area is smaller than // min_polygon_area * total area, then remove polygon @@ -61,7 +62,8 @@ int main(const int argc, const char *argv[]) remove_tiny_polygons, min_polygon_area, plot_quadtree, - rays); + rays, + output_preprocessed); // Initialize cart_info. It contains all the information about the cartogram // that needs to be handled by functions called from main(). @@ -142,9 +144,7 @@ int main(const int argc, const char *argv[]) cart_info.shift_insets_to_target_position(); // Output to GeoJSON - cart_info.write_geojson( - geo_file_name, - map_name + "_equal_area"); + cart_info.write_geojson(geo_file_name, map_name + "_equal_area"); return EXIT_SUCCESS; } @@ -190,12 +190,17 @@ int main(const int argc, const char *argv[]) } std::cerr << "End of initial simplification of " << inset_pos << std::endl; - // Output rescaled GeoJSON - cart_info.write_geojson( - geo_file_name, - // processed = simplified + rescaled - // and potentially projected + small polygons removed - map_name + "_input_processed"); + if (output_preprocessed) { + // Output rescaled GeoJSON + cart_info.write_geojson( + geo_file_name, + // processed = simplified + rescaled + // and potentially projected + small polygons removed + map_name + "_input_processed"); + + // Output preprocessed CSV file + cart_info.write_csv(map_name + "_input_processed"); + } // Set up Fourier transforms const unsigned int lx = inset_state.lx(); diff --git a/src/misc/parse_arguments.cpp b/src/misc/parse_arguments.cpp index 330ee75c..1da6ef8f 100644 --- a/src/misc/parse_arguments.cpp +++ b/src/misc/parse_arguments.cpp @@ -22,7 +22,8 @@ argparse::ArgumentParser parsed_arguments( bool &remove_tiny_polygons, double &minimum_polygon_area, bool &plot_quadtree, - bool &rays) + bool &rays, + bool &output_preprocessed) { // Create parser for arguments using argparse. // From https://github.com/p-ranav/argparse @@ -115,6 +116,10 @@ argparse::ArgumentParser parsed_arguments( .help("Boolean: Use old ray shooting method to fill density") .default_value(false) .implicit_value(true); + arguments.add_argument("--output_preprocessed") + .help("Boolean: output input GeoJSON and CSV after preprocessing") + .default_value(false) + .implicit_value(true); // Arguments of column names in provided visual variables file (CSV) std::string pre = "String: Column name for "; @@ -174,6 +179,7 @@ argparse::ArgumentParser parsed_arguments( make_csv = arguments.get("-M"); output_equal_area = arguments.get("-E"); output_to_stdout = arguments.get("-O"); + output_preprocessed = arguments.get("--output_preprocessed"); plot_density = arguments.get("-d"); plot_grid = arguments.get("-g"); plot_intersections = arguments.get("-i"); From 93770651ebcfae8386c82f123bb4bd9727876e99 Mon Sep 17 00:00:00 2001 From: adisidev <64905594+adisidev@users.noreply.github.com> Date: Thu, 7 Nov 2024 18:22:06 +0800 Subject: [PATCH 100/100] Make changes suggested by @nihalzp --- .github/workflows/build.yml | 2 +- README.md | 4 +--- apt-requirements.txt | 15 ------------ src/cartogram_info/cartogram_info.cpp | 34 +++++++++++++-------------- 4 files changed, 19 insertions(+), 36 deletions(-) delete mode 100644 apt-requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5685fc51..2c95785f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ on: # when a push is made to the main branch (like when a pull request is merged, or something is pushed directly) workflow_dispatch: push: - branches: [ "main", "pr/adisidev/201" ] + branches: [ "main" ] env: BUILD_TYPE: Release diff --git a/README.md b/README.md index 43ccce76..f8238621 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,7 @@ Install pkg-config, boost, fftw, nlohmann-json, and cmake by running the followi Have a look through to apt-requirements.txt if you'd like to see what all will be installed. Then, run the following commands to install all dependencies through apt: - sudo apt update && add-apt-repository ppa:ubuntu-toolchain-r/test - cat apt-requirements.txt | xargs sudo apt install -y - + apt install -y g++-11 build-essential cmake libboost-all-dev nlohmann-json3-dev libomp-dev libfftw3-dev libcairo2-dev ### Installation diff --git a/apt-requirements.txt b/apt-requirements.txt deleted file mode 100644 index 40364bd2..00000000 --- a/apt-requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -git -gcc-11 -g++-11 -build-essential -manpages-dev -software-properties-common -nlohmann-json3-dev -libomp-dev -libfftw3-dev -libcairo2-dev -libboost-all-dev -cmake -libgmp3-dev -libmpfr-dev -libeigen3-dev \ No newline at end of file diff --git a/src/cartogram_info/cartogram_info.cpp b/src/cartogram_info/cartogram_info.cpp index c439b90d..5d9630d3 100644 --- a/src/cartogram_info/cartogram_info.cpp +++ b/src/cartogram_info/cartogram_info.cpp @@ -208,26 +208,26 @@ std::string CartogramInfo::set_map_name(const std::string &map_name) void CartogramInfo::write_csv(const std::string &csv_file_name) { // Write a csv file with the current target areas - std::ofstream out_file_csv; - out_file_csv.open(csv_file_name + ".csv"); - if (!out_file_csv) { - std::cerr - << "ERROR writing GeoJSON: failed to open " << csv_file_name << ".csv" - << std::endl; - } + std::ofstream out_file_csv; + out_file_csv.open(csv_file_name + ".csv"); + if (!out_file_csv) { + std::cerr + << "ERROR writing CSV: failed to open " << csv_file_name << ".csv" + << std::endl; + } - // Each vector of strings will represent one row, starting with column names - std::vector > csv_rows(1); + // Each vector of strings will represent one row, starting with column names + std::vector > csv_rows(1); - csv_rows[0].push_back(id_header_); - csv_rows[0].push_back("Target Area"); + csv_rows[0].push_back(id_header_); + csv_rows[0].push_back("Target Area"); - // Fill up the rows with the IDs and target areas - for (const auto &[id, inset_pos] : gd_to_inset_) { - const auto &inset_state = inset_states_.at(inset_pos); - const auto target_area = inset_state.target_area_at(id); - csv_rows.push_back({id, std::to_string(target_area)}); - } + // Fill up the rows with the IDs and target areas + for (const auto &[id, inset_pos] : gd_to_inset_) { + const auto &inset_state = inset_states_.at(inset_pos); + const auto target_area = inset_state.target_area_at(id); + csv_rows.push_back({id, std::to_string(target_area)}); + } // Write to CSV object auto writer = csv::make_csv_writer(out_file_csv);