From 350bd8d94176d70ca86df7109846f9231105b906 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Sun, 19 Feb 2023 10:55:33 +0200 Subject: [PATCH] New name base91x and prepared for PYPI release --- .github/workflows/main.yml | 222 ++++++++++++---------- CMakeLists.txt | 23 ++- README.md | 9 +- base91.hpp => include/base91x.hpp | 32 ++-- perf_base91.py => perf/__main__.py | 33 +++- perf_base91.cpp => perf/perf_base91x.cpp | 31 ++- pyproject.toml | 27 +++ requirements.txt | 8 + setup.py | 3 + base91.cpp => src/base91x.cpp | 31 ++- src/base91x/__init__.py | 4 + src/base91x/__main__.py | 63 ++++++ base91.py => src/base91x/base91x.py | 90 +++------ test.sh | 24 --- test_base91.py | 61 ------ tests/__init__.py | 0 test_base91.cpp => tests/test_base91x.cpp | 65 +++++-- tests/test_base91x.py | 84 ++++++++ 18 files changed, 494 insertions(+), 316 deletions(-) rename base91.hpp => include/base91x.hpp (89%) rename perf_base91.py => perf/__main__.py (57%) rename perf_base91.cpp => perf/perf_base91x.cpp (67%) create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 setup.py rename base91.cpp => src/base91x.cpp (57%) create mode 100644 src/base91x/__init__.py create mode 100644 src/base91x/__main__.py rename base91.py => src/base91x/base91x.py (59%) delete mode 100644 test.sh delete mode 100644 test_base91.py create mode 100644 tests/__init__.py rename test_base91.cpp => tests/test_base91x.cpp (73%) create mode 100644 tests/test_base91x.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0690a16..d2c234a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,11 +6,10 @@ on: pull_request: branches: [ main ] release: - types: [created] + types: [ released ] jobs: -# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # test: @@ -18,107 +17,120 @@ jobs: steps: - - name: Dump GitHub context - env: - GITHUB_CONTEXT: ${{ toJSON(github) }} - run: echo "$GITHUB_CONTEXT" - - - name: Checkout - uses: actions/checkout@v2 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.sha }} - - # c++ - - - name: CMake - run: cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_FLAGS=" -O " . - - - name: Make - run: make all - - - name: C++ Unit Tests - run: make test - - - name: C++ Performance Tests - run: ./perf_base91 - - # python: - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - - name: Install pytest - run: pip install pytest pytest-random-order pytest-repeat - - - name: Python Unit Tests - run: pytest --count=100 --random-order-bucket=global - - - name: Python Performance Test - run: python perf_base91.py - - # cross tests - - - name: Make random bytes - run: dd if=/dev/urandom of=orig.dat bs=1 count=$RANDOM - - - name: Encode with python - run: python base91.py -e orig.dat py.txt - - - name: Encode with cpp - run: ./base91 -e orig.dat cpp.txt - - - name: Compare text - run: diff py.txt cpp.txt - - - name: Decode with cpp - run: ./base91 -d py.txt cpp.dat - - - name: Decode with python - run: python base91.py -d cpp.txt py.dat - - - name: Compare refurbish data - run: | - HASH_ORIG=$(sha256sum orig.dat | cut -c -64) - echo "HASH_ORIG='${HASH_ORIG}'" - HASH_CPP=$(sha256sum cpp.dat | cut -c -64) - echo "HASH_CPP='${HASH_CPP}'" - HASH_PY=$(sha256sum py.dat | cut -c -64) - echo "HASH_PY='${HASH_PY}'" - if [ -n "${HASH_ORIG}" ] && [ "${HASH_ORIG}" == "${HASH_CPP}" ] && [ "${HASH_ORIG}" == "${HASH_PY}" ]; then - echo "OK" - else - echo "FAIL" - exit 1 - fi - - - name: Trash decoding with python - run: python base91.py -d orig.dat trash_py.dat - - - name: Trash decoding with cpp - run: ./base91 -d orig.dat trash_cpp.dat - - - name: Compare trash data - run: | - TRASH_HASH_CPP=$(sha256sum trash_cpp.dat | cut -c -64) - echo "TRASH_HASH_CPP='${TRASH_HASH_CPP}'" - TRASH_HASH_PY=$(sha256sum trash_py.dat | cut -c -64) - echo "TRASH_HASH_PY='${TRASH_HASH_PY}'" - if [ -n "${TRASH_HASH_CPP}" ] && [ "${TRASH_HASH_CPP}" == "${TRASH_HASH_PY}" ]; then - echo "OK" - else - echo "FAIL" - exit 1 - fi - - - name: Failure data upload - if: ${{ failure() }} - uses: actions/upload-artifact@v3 - with: - name: crash_data - path: ./*.dat - -# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + # # # # c++ + + - name: CMake + run: cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_FLAGS=" -O " . + + - name: Make + run: make all + + - name: C++ Unit Tests + run: make test + + - name: C++ Performance Tests + run: make perf + + # # # # python: + + - name: Set up latest Python + uses: actions/setup-python@v3 + with: + python-version: 3.x + + - name: Update PIP + run: python -m pip install --upgrade pip + + - name: Install requirements + run: pip install --requirement requirements.txt + + - name: Update PIP + run: python -m pip install --upgrade pip + + - name: Python Unit Tests + run: pytest --count=10 --random-order-bucket=global + + - name: Python Performance Test + run: python -m perf + + - name: Build package + run: python -m build + + - name: Install package + run: python -m pip install $(find dist -type f -wholename "*.whl") + + # # # # cross tests + + - name: Make random bytes + run: dd if=/dev/urandom of=orig.dat bs=1 count=$RANDOM + + - name: Encode with python + run: python -m base91x -e orig.dat py.txt + + - name: Encode with cpp + run: ./base91x -e orig.dat cpp.txt + + - name: Compare text + run: diff py.txt cpp.txt + + - name: Decode with cpp + run: ./base91x -d py.txt cpp.dat + + - name: Decode with python + run: python -m base91x -d cpp.txt py.dat + + - name: Compare refurbish data + run: | + HASH_ORIG=$(sha256sum orig.dat | cut -c -64) + echo "HASH_ORIG='${HASH_ORIG}'" + HASH_CPP=$(sha256sum cpp.dat | cut -c -64) + echo "HASH_CPP='${HASH_CPP}'" + HASH_PY=$(sha256sum py.dat | cut -c -64) + echo "HASH_PY='${HASH_PY}'" + if [ -n "${HASH_ORIG}" ] && [ "${HASH_ORIG}" == "${HASH_CPP}" ] && [ "${HASH_ORIG}" == "${HASH_PY}" ]; then + echo "OK" + else + echo "FAIL" + exit 1 + fi + + - name: Trash decoding with python + run: python -m base91x -d orig.dat trash_py.dat + + - name: Trash decoding with cpp + run: ./base91x -d orig.dat trash_cpp.dat + + - name: Compare trash data + run: | + TRASH_HASH_CPP=$(sha256sum trash_cpp.dat | cut -c -64) + echo "TRASH_HASH_CPP='${TRASH_HASH_CPP}'" + TRASH_HASH_PY=$(sha256sum trash_py.dat | cut -c -64) + echo "TRASH_HASH_PY='${TRASH_HASH_PY}'" + if [ -n "${TRASH_HASH_CPP}" ] && [ "${TRASH_HASH_CPP}" == "${TRASH_HASH_PY}" ]; then + echo "OK" + else + echo "FAIL" + exit 1 + fi + + - name: Failure data upload + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: crash_data + path: ./*.dat + + # # # # deploy if release + + - name: Publish + if: ${{ 'release' == github.event_name }} + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a66d9f..3cc1ba9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,24 @@ cmake_minimum_required(VERSION 3.10) -project(base91) +project(base91x) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -pedantic -Wall -Wextra ") -add_executable(base91 base91.cpp) +# includes +INCLUDE_DIRECTORIES(./include/) -add_executable(perf_base91 perf_base91.cpp perf_base91.cpp) - -add_executable(test_base91 test_base91.cpp test_base91.cpp) - -add_test(NAME test_base91 COMMAND test_base91) +# main executable +add_executable(${PROJECT_NAME} src/base91x.cpp) +# unit tests +set(PROJECT_TEST_NAME test_${PROJECT_NAME}) +add_executable(${PROJECT_TEST_NAME} tests/test_base91x.cpp) +add_test(NAME ${PROJECT_TEST_NAME} COMMAND ${PROJECT_TEST_NAME}) enable_testing() -install(TARGETS base91) +# performance test - use make perf +set(PROJECT_PERF_NAME perf_${PROJECT_NAME}) +add_executable(${PROJECT_PERF_NAME} perf/perf_base91x.cpp) +ADD_CUSTOM_TARGET(perf COMMAND ${PROJECT_PERF_NAME}) + +install(TARGETS ${PROJECT_NAME}) diff --git a/README.md b/README.md index 3025a5f..972d05b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # base91x -This base91 method provides data encoding and decoding +This base91x method provides data encoding and decoding using numeric system of base 91 with specific alphabet that does not require escaping any symbols in C, C++ (and many other languages?) string. 'x' - means the alphabet was obtained with XOR function. @@ -22,13 +22,14 @@ There is possibility to extend the algorithm to use 89 codes during decode. The alphabet transforms from base91 value with operation XOR(0x7F) with the tree exceptions. -The alphabet: +The alphabet in the order: ``` !~}|{zyxwvutsrqponmlkjihgfedcba`_^]#[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)($&% ``` -PAY ATTENTION: -Encoded string may have unpleased sequence /* or */ + +### PAY ATTENTION: +Encoded string may have awkwardly sequence ``/*`` or ``*/`` It may hurt C or C++ code when the string is placed into code. But sequence %%% should not appear. So, encoded string might be placed with raw string literal: diff --git a/base91.hpp b/include/base91x.hpp similarity index 89% rename from base91.hpp rename to include/base91x.hpp index 77f07cd..de60ea9 100644 --- a/base91.hpp +++ b/include/base91x.hpp @@ -35,7 +35,7 @@ SOFTWARE. //------------------------------------------------------------------------------ /** - * Class base91 provides encoding and decoding statics methods + * Class base91x provides encoding and decoding statics methods * using numeric system of base 91 with specific alphabet that does not require * escaping any symbols in C, C++ string. * The alphabet contains printable characters of ASCII except: @@ -48,16 +48,16 @@ SOFTWARE. * There is possibility to extend the algorithm to use 89 codes during decode. */ -class base91 +class base91x { public: /** Base of the numeric system is 91dec equals ASCII symbol [ */ - static const char BASE91_LEN = 91; + static const char BASE91X_LEN = 91; /** Bits in one byte. Should be 8 */ static const unsigned char_bit = CHAR_BIT; - /** Pair of base91 symbols might code 13 bits */ + /** Pair of base91x symbols might code 13 bits */ static const unsigned b91word_bit = 13; /** 8192 possibly values for 13 bits */ @@ -72,15 +72,15 @@ class base91 /** Mask for reverse alphabet */ static constexpr unsigned zyx_mask = ZYX_LEN - 1; - /** BASE91 JSON OPTIMIZED ALPHABET: */ - static constexpr unsigned char BASE91_ALPHABET[BASE91_LEN] = {'!', '~', '}', '|', '{', 'z', 'y', 'x', 'w', 'v', + /** base91x JSON OPTIMIZED ALPHABET: */ + static constexpr unsigned char BASE91X_ALPHABET[BASE91X_LEN] = {'!', '~', '}', '|', '{', 'z', 'y', 'x', 'w', 'v', 'u', 't', 's', 'r', 'q', 'p', 'o', 'n', 'm', 'l', 'k', 'j', 'i', 'h', 'g', 'f', 'e', 'd', 'c', 'b', 'a', '`', '_', '^', ']', '#', '[', 'Z', 'Y', 'X', 'W', 'V', 'U', 'T', 'S', 'R', 'Q', 'P', 'O', 'N', 'M', 'L', 'K', 'J', 'I', 'H', 'G', 'F', 'E', 'D', 'C', 'B', 'A', '@', '?', '>', '=', '<', ';', ':', '9', '8', '7', '6', '5', '4', '3', '2', '1', '0', '/', '.', '-', ',', '+', '*', ')', '(', '$', '&', '%'}; - /** BASE91 reverse table for quick decoding */ - static constexpr char BASE91_ZYX[ZYX_LEN] + /** base91x reverse table for quick decoding */ + static constexpr char BASE91X_ZYX[ZYX_LEN] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 35, 88, 90, 89, -1, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, @@ -142,9 +142,9 @@ class base91 bit_collected += char_bit; while (b91word_bit <= bit_collected) { - div_t d = std::div(b91word_mask & collector, BASE91_LEN); - text.push_back(BASE91_ALPHABET[d.rem]); - text.push_back(BASE91_ALPHABET[d.quot]); + div_t d = std::div(b91word_mask & collector, BASE91X_LEN); + text.push_back(BASE91X_ALPHABET[d.rem]); + text.push_back(BASE91X_ALPHABET[d.quot]); collector >>= b91word_bit; bit_collected -= b91word_bit; } @@ -152,11 +152,11 @@ class base91 if (0 != bit_collected) { - const div_t d = std::div(b91word_mask & collector, BASE91_LEN); - text.push_back(BASE91_ALPHABET[d.rem]); + const div_t d = std::div(b91word_mask & collector, BASE91X_LEN); + text.push_back(BASE91X_ALPHABET[d.rem]); if (7 <= bit_collected) { - text.push_back(BASE91_ALPHABET[d.quot]); + text.push_back(BASE91X_ALPHABET[d.quot]); } } } @@ -184,7 +184,7 @@ class base91 { continue; } - const char digit = BASE91_ZYX[zyx_mask & i]; + const char digit = BASE91X_ZYX[zyx_mask & i]; if (-1 == digit) { continue; @@ -195,7 +195,7 @@ class base91 continue; } - collector |= (BASE91_LEN * digit + lower) << bit_collected; + collector |= (BASE91X_LEN * digit + lower) << bit_collected; bit_collected += b91word_bit; lower = -1; diff --git a/perf_base91.py b/perf/__main__.py similarity index 57% rename from perf_base91.py rename to perf/__main__.py index 3a95bbc..1b3855f 100644 --- a/perf_base91.py +++ b/perf/__main__.py @@ -1,10 +1,33 @@ +""" +MIT License + +Copyright (c) 2023 Roman Babenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" import math import random import sys import time from typing import List -import base91 +from src.base91x import base91x LEN = 100 SIZE = 1 << 18 @@ -24,9 +47,9 @@ def perf_test() -> int: for n in range(LEN): data = random.randbytes(SIZE) start_time = time.time() - text = base91.encode(data) + text = base91x.encode(data) encoding_stop_time = time.time() - refurbish_data = base91.decode(text) + refurbish_data = base91x.decode(text) decoding_stop_time = time.time() assert data == refurbish_data encoding_elapsed_time = encoding_stop_time - start_time @@ -60,7 +83,5 @@ def main() -> int: return 0 -assert 3 == sys.version_info.major and 9 <= sys.version_info.minor - -if "__main__" == __name__: +if __name__ == "__main__": sys.exit(main()) diff --git a/perf_base91.cpp b/perf/perf_base91x.cpp similarity index 67% rename from perf_base91.cpp rename to perf/perf_base91x.cpp index 26f2988..b255762 100644 --- a/perf_base91.cpp +++ b/perf/perf_base91x.cpp @@ -1,3 +1,26 @@ +/* +MIT License + +Copyright (c) 2023 Roman Babenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ #include #include #include @@ -5,7 +28,7 @@ #include #include -#include "base91.hpp" +#include "base91x.hpp" #define LOG std::cerr << "L_" << __LINE__ << ": " @@ -29,7 +52,7 @@ void time_stat_encode(const size_t size) for (unsigned n = 0; n < LEN; ++n) { const std::chrono::system_clock::time_point startTime = std::chrono::system_clock::now(); - base91::encode(data, text); + base91x::encode(data, text); stat[n] = 0.000000001 * static_cast(std::chrono::nanoseconds(std::chrono::system_clock::now() - startTime).count()); sum += stat[n]; @@ -54,7 +77,7 @@ void time_stat_decode(const size_t size) data.resize(size); std::generate(data.begin(), data.end(), std::rand); std::string text; - base91::encode(data, text); + base91x::encode(data, text); long double stat[LEN] = {0}; long double avg = 0; @@ -65,7 +88,7 @@ void time_stat_decode(const size_t size) for (unsigned n = 0; n < LEN; ++n) { const std::chrono::system_clock::time_point startTime = std::chrono::system_clock::now(); - base91::decode(text, data); + base91x::decode(text, data); stat[n] = 0.000000001 * static_cast(std::chrono::nanoseconds(std::chrono::system_clock::now() - startTime).count()); sum += stat[n]; diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2f5763a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,27 @@ +[project] +name = "base91x" +version = "1.0.0" +description = "base91x encoding/decoding library" +authors = [{ name = "Roman Babenko", email = "babenek@users.noreply.github.com" }] +readme = "README.md" +requires-python = ">=3.8" +keywords = ["encode", "decode", "base91x"] +classifiers = [ + "Programming Language :: Python :: 3", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules" +] + +[project.urls] +"Homepage" = "https://github.com/babenek/base91" +"Bug Tracker" = "https://github.com/babenek/base91/issues" + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] +include = ["*"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..03b957f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +# build package +build +setuptools + +# testing +pytest +pytest-random-order +pytest-repeat diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8c7fad0 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +import setuptools + +setuptools.setup(entry_points={"console_scripts": ["base91x=base91x.__main__:main"]}) diff --git a/base91.cpp b/src/base91x.cpp similarity index 57% rename from base91.cpp rename to src/base91x.cpp index 71a4ee0..f477b93 100644 --- a/base91.cpp +++ b/src/base91x.cpp @@ -1,8 +1,31 @@ +/* +MIT License + +Copyright (c) 2023 Roman Babenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ #include #include #include -#include "base91.hpp" +#include "base91x.hpp" //------------------------------------------------------------------------------ @@ -44,7 +67,7 @@ int main(const int argc, const char *argv[]) { if (argc != 4 or not(0 == strncmp("-e", argv[1], 3) or 0 == strncmp("-d", argv[1], 3))) { - std::cerr << "base91 -e|-d "; + std::cerr << "base91x -e|-d "; return -1; } @@ -54,7 +77,7 @@ int main(const int argc, const char *argv[]) std::string out; readFile(argv[2], in); - base91::encode(in, out); + base91x::encode(in, out); writeFile(argv[3], out); } else @@ -63,7 +86,7 @@ int main(const int argc, const char *argv[]) std::vector out; readFile(argv[2], in); - base91::decode(in, out); + base91x::decode(in, out); writeFile(argv[3], out); } diff --git a/src/base91x/__init__.py b/src/base91x/__init__.py new file mode 100644 index 0000000..5df7288 --- /dev/null +++ b/src/base91x/__init__.py @@ -0,0 +1,4 @@ +from .base91x import BASE91X_ALPHABET +from .base91x import BASE91X_LEN +from .base91x import decode +from .base91x import encode diff --git a/src/base91x/__main__.py b/src/base91x/__main__.py new file mode 100644 index 0000000..4964db4 --- /dev/null +++ b/src/base91x/__main__.py @@ -0,0 +1,63 @@ +"""Base91x encoder-decoder +MIT License + +Copyright (c) 2023 Roman Babenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import sys + +from base91x import base91x + + +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + +def main() -> int: + """main function + + Args: + sys.argv[1]: -e|-d + sys.argv[2]: input file + sys.argv[3]: output file + Return: + EXIT_SUCCESS if no exception was thrown + """ + result = 1 + if 4 == len(sys.argv): + if '-e' == sys.argv[1]: + with open(sys.argv[2], 'rb') as in_file: + with open(sys.argv[3], 'wt', encoding='ascii') as out_file: + out_file.write(base91x.encode(in_file.read())) + result = 0 + elif '-d' == sys.argv[1]: + with open(sys.argv[2], 'rt', encoding='ascii', errors='ignore') as in_file: + with open(sys.argv[3], 'wb') as out_file: + out_file.write(base91x.decode(in_file.read())) + result = 0 + + if 0 != result: + print("base91x -e|-d ") + return result + + +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/base91.py b/src/base91x/base91x.py similarity index 59% rename from base91.py rename to src/base91x/base91x.py index 687ddf8..434cacf 100644 --- a/base91.py +++ b/src/base91x/base91x.py @@ -1,4 +1,4 @@ -"""Base91 encoder-decoder +"""Base91x encoder-decoder MIT License Copyright (c) 2022 Roman Babenko @@ -22,28 +22,26 @@ SOFTWARE. """ -import sys - -# Base91 BASE91_ALPHABET in order -BASE91_ALPHABET = "!~}|{zyxwvutsrqponmlkjihgfedcba`_^]#[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)($&%" +# BASE91X_ALPHABET in order +BASE91X_ALPHABET = "!~}|{zyxwvutsrqponmlkjihgfedcba`_^]#[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)($&%" # Base of the numeric system is 91 dec equals ASCII symbol [ -BASE91_LEN = 91 +BASE91X_LEN = 91 # Bits in one byte. Should be 8 CHAR_BIT = 8 -# Pair of base91 symbols might code 13 bits -BASE91_WORD_BIT = 13 +# Pair of base91x symbols might code 13 bits +BASE91X_WORD_BIT = 13 # 8192 possibly values for 13 bits -BASE91_WORD_SIZE = 0x2000 +BASE91X_WORD_SIZE = 0x2000 # Mask for 13 bits -BASE91_WORD_MASK = 0x1FFF +BASE91X_WORD_MASK = 0x1FFF -# BASE91 reverse table for quick decoding -BASE91_ZYX = [ +# BASE91X reverse table for quick decoding +BASE91X_ZYX = [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, # -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, # -1, 0, -1, 35, 88, 90, 89, -1, 87, 86, 85, 84, 83, 82, 81, 80, # @@ -53,7 +51,7 @@ 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, # 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, -1] -ZYX_LEN = len(BASE91_ZYX) +ZYX_LEN = len(BASE91X_ZYX) ZYX_MASK = ZYX_LEN - 1 @@ -61,7 +59,7 @@ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def encode(data: bytes) -> str: - """Encodes bytes to base91 text + """Encodes bytes to base91x text Args: data: input bytes @@ -75,27 +73,27 @@ def encode(data: bytes) -> str: for i in data: collector |= i << bit_collected bit_collected += CHAR_BIT - while BASE91_WORD_BIT <= bit_collected: - cod = BASE91_WORD_MASK & collector - quotient, remainder = divmod(cod, BASE91_LEN) - text += BASE91_ALPHABET[remainder] - text += BASE91_ALPHABET[quotient] - collector >>= BASE91_WORD_BIT - bit_collected -= BASE91_WORD_BIT + while BASE91X_WORD_BIT <= bit_collected: + cod = BASE91X_WORD_MASK & collector + quotient, remainder = divmod(cod, BASE91X_LEN) + text += BASE91X_ALPHABET[remainder] + text += BASE91X_ALPHABET[quotient] + collector >>= BASE91X_WORD_BIT + bit_collected -= BASE91X_WORD_BIT if 0 != bit_collected: - cod = BASE91_WORD_MASK & collector - quotient, remainder = divmod(cod, BASE91_LEN) - text += BASE91_ALPHABET[remainder] + cod = BASE91X_WORD_MASK & collector + quotient, remainder = divmod(cod, BASE91X_LEN) + text += BASE91X_ALPHABET[remainder] if 7 <= bit_collected: - text += BASE91_ALPHABET[quotient] + text += BASE91X_ALPHABET[quotient] return text # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def decode(text: str) -> bytearray: - """Decodes text with the base91 algorithm, skipping wrong symbols + """Decodes text with the base91x algorithm, skipping wrong symbols Args: text: input string with encoded data @@ -111,15 +109,15 @@ def decode(text: str) -> bytearray: i = ord(symbol) if ZYX_MASK < i: continue - digit = BASE91_ZYX[ZYX_MASK & i] + digit = BASE91X_ZYX[ZYX_MASK & i] if -1 == digit: continue if -1 == lower: lower = digit continue - collector |= (BASE91_LEN * digit + lower) << bit_collected - bit_collected += BASE91_WORD_BIT + collector |= (BASE91X_LEN * digit + lower) << bit_collected + bit_collected += BASE91X_WORD_BIT lower = -1 while CHAR_BIT <= bit_collected: @@ -135,37 +133,3 @@ def decode(text: str) -> bytearray: data.append(0xFF & collector) return data - - -# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # - -def main(argv) -> int: - """main function - - Args: - argv[1]: -e|-d - argv[2]: input file - argv[3]: output file - Return: - EXIT_SUCCESS if no exception was thrown - """ - result = 1 - if 4 == len(argv): - if '-e' == argv[1]: - with open(argv[2], 'rb') as in_file: - with open(argv[3], 'wt', encoding='ascii') as out_file: - out_file.write(encode(in_file.read())) - result = 0 - elif '-d' == argv[1]: - with open(argv[2], 'rt', encoding='ascii', errors='ignore') as in_file: - with open(argv[3], 'wb') as out_file: - out_file.write(decode(in_file.read())) - result = 0 - - if 0 != result: - print("base91 -e|-d ") - return result - - -if "__main__" == __name__: - sys.exit(main(sys.argv)) diff --git a/test.sh b/test.sh deleted file mode 100644 index d135b32..0000000 --- a/test.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -set -e - -for n in `seq 0 16384`; -do - original="/dev/shm/$n.original" - head --bytes $n < /dev/urandom > $original - md5original=$(md5sum $original | cut -c -32) - #echo $md5original - encoded="/dev/shm/$n.encoded" - ./base91 -e $original $encoded - decoded="/dev/shm/$n.decoded" - ./base91 -d $encoded $decoded - md5decoded=$(md5sum $decoded | cut -c -32) - if [[ "$md5decoded" != "$md5original" ]]; - then - echo "Mismatch:$md5original ~ $md5decoded"; - exit 1 - fi - rm $original $encoded $decoded -done - -echo PASS diff --git a/test_base91.py b/test_base91.py deleted file mode 100644 index 7f70d01..0000000 --- a/test_base91.py +++ /dev/null @@ -1,61 +0,0 @@ -import random -import string -from random import randbytes - -import base91 - -TEXT = "The quick brown\r\nfox\tjumps\nover\rthe lazy\n\rdog!" -PANGRAM = "Thequickbrownfoxjumpsoverthelazydog!" -DATA = [88, 136, 162, 112, 31, 156, 195, 75, 208, 5, 61, 106, 20, 163, 227, 172, 240, 150, 163, 100, 63, 170, 82, - 175, 58, 17, 203, 5, 3] - - -def test_static_alphabet(): - assert len(base91.BASE91_ALPHABET) == base91.BASE91_LEN - - -def test_static_decode(): - data = base91.decode(TEXT) - assert list(data) == DATA - - -def test_static_encode(): - text = base91.encode(bytes(DATA)) - assert text == PANGRAM - - -def test_refurbish_small(): - for n in range(33): - original_data = randbytes(n) - text = base91.encode(original_data) - refurbish_data = base91.decode(text) - assert len(original_data) == len(refurbish_data) - assert original_data == refurbish_data, text - - -def test_refurbish_large(): - original_data = randbytes(65536) - text = base91.encode(original_data) - refurbish_data = base91.decode(text) - assert len(original_data) == len(refurbish_data) - assert original_data == refurbish_data, text - - -def test_stress_full_decode(): - text = "" - text_size = random.randint(0, 65536) - while text_size > len(text): - text += chr(random.randint(0, 0x10FFFF)) - assert len(text) == text_size - data = base91.decode(text) - assert len(data) <= text_size - - -def test_stress_ascii_decode(): - text = "" - text_size = random.randint(0, 65536) - while text_size > len(text): - text += random.choice(string.printable) - assert len(text) == text_size - data = base91.decode(text) - assert len(data) <= text_size diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test_base91.cpp b/tests/test_base91x.cpp similarity index 73% rename from test_base91.cpp rename to tests/test_base91x.cpp index ce18543..f9162ab 100644 --- a/test_base91.cpp +++ b/tests/test_base91x.cpp @@ -1,3 +1,26 @@ +/* +MIT License + +Copyright (c) 2023 Roman Babenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ #include #include #include @@ -5,7 +28,7 @@ #include #include -#include "base91.hpp" +#include "base91x.hpp" #define LOG std::cerr << "L_" << __LINE__ << ": " @@ -20,19 +43,19 @@ bool in_abc(char i) bool test_alphabets() { - if (91 != sizeof(base91::BASE91_ALPHABET)) + if (91 != sizeof(base91x::BASE91X_ALPHABET)) { - LOG << "Wrong size of direct alphabet "<< sizeof(base91::BASE91_ALPHABET) << std::endl; + LOG << "Wrong size of direct alphabet "<< sizeof(base91x::BASE91X_ALPHABET) << std::endl; return false; } - if (0x80 != sizeof(base91::BASE91_ZYX)) + if (0x80 != sizeof(base91x::BASE91X_ZYX)) { LOG << "Wrong size of reverse alphabet" << std::endl; return false; } - for (unsigned n = 0; n < sizeof(base91::BASE91_ZYX); ++n) + for (unsigned n = 0; n < sizeof(base91x::BASE91X_ZYX); ++n) { char digit = -2; if ((92 < n and n < 127) or (39 < n and n < 92) or (36 < n and n < 39)) @@ -46,20 +69,20 @@ bool test_alphabets() else digit = -1; - if (digit != base91::BASE91_ZYX[n]) + if (digit != base91x::BASE91X_ZYX[n]) { LOG << "error on " << n << " element" << std::endl; return false; } } - for (auto &i : base91::BASE91_ALPHABET) + for (auto &i : base91x::BASE91X_ALPHABET) { - if (sizeof(base91::BASE91_ZYX) <= i) + if (sizeof(base91x::BASE91X_ZYX) <= i) { LOG << "out of size " << (i) << " element" << std::endl; return false; } - if (0 > base91::BASE91_ZYX[i]) + if (0 > base91x::BASE91X_ZYX[i]) { LOG << "error on " << i << " element" << std::endl; return false; @@ -71,7 +94,7 @@ bool test_alphabets() } } - // const char digit = BASE91_ZYX[0x7F & i]; + // const char digit = BASE91X_ZYX[0x7F & i]; LOG << "Alphabets tests passed" << std::endl; return true; @@ -86,15 +109,15 @@ bool test_refurbish(const size_t size) std::generate(data_original.begin(), data_original.end(), std::rand); std::string text; - base91::encode(data_original, text); + base91x::encode(data_original, text); size_t n = 0; - // verify whether encoded text contains symbols from base91 alphabet only + // verify whether encoded text contains symbols from base91x alphabet only for (auto it_text : text) { if (not in_abc(it_text)) { - LOG << "error on " << n << " element '" << it_text << "' not in base91::BASE91_ALPHABET" << std::endl; + LOG << "error on " << n << " element '" << it_text << "' not in base91x::BASE91X_ALPHABET" << std::endl; return false; } ++n; @@ -111,7 +134,7 @@ bool test_refurbish(const size_t size) } T data_refurbed; - base91::decode(text, data_refurbed); + base91x::decode(text, data_refurbed); if (data_original.size() != data_refurbed.size()) { @@ -136,15 +159,15 @@ bool test_refurbish(const size_t size) } LOG << "Test passed with size = " << data_original.size() << std::endl; - if (base91::compute_encoded_size(size) != text.size()) + if (base91x::compute_encoded_size(size) != text.size()) { - LOG << "computed " << base91::compute_encoded_size(size) << " != " << text.size() << std::endl; + LOG << "computed " << base91x::compute_encoded_size(size) << " != " << text.size() << std::endl; return false; } - if (base91::assume_decoded_size(text.size()) != size) + if (base91x::assume_decoded_size(text.size()) != size) { - LOG << "assumed " << base91::assume_decoded_size(size) << " != " << text.size() << std::endl; + LOG << "assumed " << base91x::assume_decoded_size(size) << " != " << text.size() << std::endl; return false; } @@ -158,7 +181,7 @@ bool test_static_with_space() const std::string pangram_test = "* / * / *The quick brown fox\tjumps\nover\rthe lazy dog { < # > }"; const std::string pangram_expected = "*/*/*Thequickbrownfoxjumpsoverthelazydog{<#>}"; std::vector data; - base91::decode(pangram_test, data); + base91x::decode(pangram_test, data); const std::vector data_expected = {197, 188, 152, 123, 190, 170, 196, 57, 20, 212, 152, 234, 45, 19, 38, 185, 248, 29, 56, 51, 69, 134, 70, 46, 193, 65, 219, 166, 60, 124, 38, 76, 84, 125, 125, 174}; @@ -191,7 +214,7 @@ bool test_static_with_space() } std::string pangram_refurb; - base91::encode(data, pangram_refurb); + base91x::encode(data, pangram_refurb); if (pangram_expected != pangram_refurb) { @@ -210,7 +233,7 @@ bool test_stress(const size_t size) text.resize(size); std::generate(text.begin(), text.end(), std::rand); std::vector data; - base91::decode(text, data); + base91x::decode(text, data); LOG << "Stress test: from " << size << " symbols were decoded " << data.size() << " bytes" << std::endl; LOG << "Stress test passed" << std::endl; return true; diff --git a/tests/test_base91x.py b/tests/test_base91x.py new file mode 100644 index 0000000..79bd7a9 --- /dev/null +++ b/tests/test_base91x.py @@ -0,0 +1,84 @@ +""" +MIT License + +Copyright (c) 2023 Roman Babenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import random +import string +from random import randbytes + +from src.base91x import base91x + +TEXT = "The quick brown\r\nfox\tjumps\nover\rthe lazy\n\rdog!" +PANGRAM = "Thequickbrownfoxjumpsoverthelazydog!" +DATA = [88, 136, 162, 112, 31, 156, 195, 75, 208, 5, 61, 106, 20, 163, 227, 172, 240, 150, 163, 100, 63, 170, 82, + 175, 58, 17, 203, 5, 3] + + +def test_static_alphabet(): + assert len(base91x.BASE91X_ALPHABET) == base91x.BASE91X_LEN + + +def test_static_decode(): + data = base91x.decode(TEXT) + assert list(data) == DATA + + +def test_static_encode(): + text = base91x.encode(bytes(DATA)) + assert text == PANGRAM + + +def test_refurbish_small(): + for n in range(33): + original_data = randbytes(n) + text = base91x.encode(original_data) + refurbish_data = base91x.decode(text) + assert len(original_data) == len(refurbish_data) + assert original_data == refurbish_data, text + + +def test_refurbish_large(): + original_data = randbytes(65536) + text = base91x.encode(original_data) + refurbish_data = base91x.decode(text) + assert len(original_data) == len(refurbish_data) + assert original_data == refurbish_data, text + + +def test_stress_full_decode(): + text = "" + text_size = random.randint(0, 65536) + while text_size > len(text): + text += chr(random.randint(0, 0x10FFFF)) + assert len(text) == text_size + data = base91x.decode(text) + assert len(data) <= text_size + + +def test_stress_ascii_decode(): + text = "" + text_size = random.randint(0, 65536) + while text_size > len(text): + text += random.choice(string.printable) + assert len(text) == text_size + data = base91x.decode(text) + assert len(data) <= text_size