Skip to content

Commit

Permalink
New name base91x and prepared for PYPI release
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Feb 19, 2023
1 parent f673597 commit 350bd8d
Show file tree
Hide file tree
Showing 18 changed files with 494 additions and 316 deletions.
222 changes: 117 additions & 105 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,119 +6,131 @@ on:
pull_request:
branches: [ main ]
release:
types: [created]
types: [ released ]

jobs:

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

test:

runs-on: ubuntu-latest

steps:

- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}

# c++

- name: CMake
run: cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_FLAGS=" -O " .

- name: Make
run: make all

- name: C++ Unit Tests
run: make test

- name: C++ Performance Tests
run: ./perf_base91

# python:

- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9

- name: Install pytest
run: pip install pytest pytest-random-order pytest-repeat

- name: Python Unit Tests
run: pytest --count=100 --random-order-bucket=global

- name: Python Performance Test
run: python perf_base91.py

# cross tests

- name: Make random bytes
run: dd if=/dev/urandom of=orig.dat bs=1 count=$RANDOM

- name: Encode with python
run: python base91.py -e orig.dat py.txt

- name: Encode with cpp
run: ./base91 -e orig.dat cpp.txt

- name: Compare text
run: diff py.txt cpp.txt

- name: Decode with cpp
run: ./base91 -d py.txt cpp.dat

- name: Decode with python
run: python base91.py -d cpp.txt py.dat

- name: Compare refurbish data
run: |
HASH_ORIG=$(sha256sum orig.dat | cut -c -64)
echo "HASH_ORIG='${HASH_ORIG}'"
HASH_CPP=$(sha256sum cpp.dat | cut -c -64)
echo "HASH_CPP='${HASH_CPP}'"
HASH_PY=$(sha256sum py.dat | cut -c -64)
echo "HASH_PY='${HASH_PY}'"
if [ -n "${HASH_ORIG}" ] && [ "${HASH_ORIG}" == "${HASH_CPP}" ] && [ "${HASH_ORIG}" == "${HASH_PY}" ]; then
echo "OK"
else
echo "FAIL"
exit 1
fi
- name: Trash decoding with python
run: python base91.py -d orig.dat trash_py.dat

- name: Trash decoding with cpp
run: ./base91 -d orig.dat trash_cpp.dat

- name: Compare trash data
run: |
TRASH_HASH_CPP=$(sha256sum trash_cpp.dat | cut -c -64)
echo "TRASH_HASH_CPP='${TRASH_HASH_CPP}'"
TRASH_HASH_PY=$(sha256sum trash_py.dat | cut -c -64)
echo "TRASH_HASH_PY='${TRASH_HASH_PY}'"
if [ -n "${TRASH_HASH_CPP}" ] && [ "${TRASH_HASH_CPP}" == "${TRASH_HASH_PY}" ]; then
echo "OK"
else
echo "FAIL"
exit 1
fi
- name: Failure data upload
if: ${{ failure() }}
uses: actions/upload-artifact@v3
with:
name: crash_data
path: ./*.dat

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}

# # # # c++

- name: CMake
run: cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_FLAGS=" -O " .

- name: Make
run: make all

- name: C++ Unit Tests
run: make test

- name: C++ Performance Tests
run: make perf

# # # # python:

- name: Set up latest Python
uses: actions/setup-python@v3
with:
python-version: 3.x

- name: Update PIP
run: python -m pip install --upgrade pip

- name: Install requirements
run: pip install --requirement requirements.txt

- name: Update PIP
run: python -m pip install --upgrade pip

- name: Python Unit Tests
run: pytest --count=10 --random-order-bucket=global

- name: Python Performance Test
run: python -m perf

- name: Build package
run: python -m build

- name: Install package
run: python -m pip install $(find dist -type f -wholename "*.whl")

# # # # cross tests

- name: Make random bytes
run: dd if=/dev/urandom of=orig.dat bs=1 count=$RANDOM

- name: Encode with python
run: python -m base91x -e orig.dat py.txt

- name: Encode with cpp
run: ./base91x -e orig.dat cpp.txt

- name: Compare text
run: diff py.txt cpp.txt

- name: Decode with cpp
run: ./base91x -d py.txt cpp.dat

- name: Decode with python
run: python -m base91x -d cpp.txt py.dat

- name: Compare refurbish data
run: |
HASH_ORIG=$(sha256sum orig.dat | cut -c -64)
echo "HASH_ORIG='${HASH_ORIG}'"
HASH_CPP=$(sha256sum cpp.dat | cut -c -64)
echo "HASH_CPP='${HASH_CPP}'"
HASH_PY=$(sha256sum py.dat | cut -c -64)
echo "HASH_PY='${HASH_PY}'"
if [ -n "${HASH_ORIG}" ] && [ "${HASH_ORIG}" == "${HASH_CPP}" ] && [ "${HASH_ORIG}" == "${HASH_PY}" ]; then
echo "OK"
else
echo "FAIL"
exit 1
fi
- name: Trash decoding with python
run: python -m base91x -d orig.dat trash_py.dat

- name: Trash decoding with cpp
run: ./base91x -d orig.dat trash_cpp.dat

- name: Compare trash data
run: |
TRASH_HASH_CPP=$(sha256sum trash_cpp.dat | cut -c -64)
echo "TRASH_HASH_CPP='${TRASH_HASH_CPP}'"
TRASH_HASH_PY=$(sha256sum trash_py.dat | cut -c -64)
echo "TRASH_HASH_PY='${TRASH_HASH_PY}'"
if [ -n "${TRASH_HASH_CPP}" ] && [ "${TRASH_HASH_CPP}" == "${TRASH_HASH_PY}" ]; then
echo "OK"
else
echo "FAIL"
exit 1
fi
- name: Failure data upload
if: ${{ failure() }}
uses: actions/upload-artifact@v3
with:
name: crash_data
path: ./*.dat

# # # # deploy if release

- name: Publish
if: ${{ 'release' == github.event_name }}
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_TOKEN }}
23 changes: 15 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
cmake_minimum_required(VERSION 3.10)
project(base91)
project(base91x)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -pedantic -Wall -Wextra ")

add_executable(base91 base91.cpp)
# includes
INCLUDE_DIRECTORIES(./include/)

add_executable(perf_base91 perf_base91.cpp perf_base91.cpp)

add_executable(test_base91 test_base91.cpp test_base91.cpp)

add_test(NAME test_base91 COMMAND test_base91)
# main executable
add_executable(${PROJECT_NAME} src/base91x.cpp)

# unit tests
set(PROJECT_TEST_NAME test_${PROJECT_NAME})
add_executable(${PROJECT_TEST_NAME} tests/test_base91x.cpp)
add_test(NAME ${PROJECT_TEST_NAME} COMMAND ${PROJECT_TEST_NAME})
enable_testing()

install(TARGETS base91)
# performance test - use make perf
set(PROJECT_PERF_NAME perf_${PROJECT_NAME})
add_executable(${PROJECT_PERF_NAME} perf/perf_base91x.cpp)
ADD_CUSTOM_TARGET(perf COMMAND ${PROJECT_PERF_NAME})

install(TARGETS ${PROJECT_NAME})
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# base91x

This base91 method provides data encoding and decoding
This base91x method provides data encoding and decoding
using numeric system of base 91 with specific alphabet that does not require
escaping any symbols in C, C++ (and many other languages?) string.
'x' - means the alphabet was obtained with XOR function.
Expand All @@ -22,13 +22,14 @@ There is possibility to extend the algorithm to use 89 codes during decode.

The alphabet transforms from base91 value with operation XOR(0x7F) with the tree exceptions.

The alphabet:
The alphabet in the order:

```
!~}|{zyxwvutsrqponmlkjihgfedcba`_^]#[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)($&%
```
PAY ATTENTION:
Encoded string may have unpleased sequence /* or */

### PAY ATTENTION:
Encoded string may have awkwardly sequence ``/*`` or ``*/``
It may hurt C or C++ code when the string is placed into code.
But sequence %%% should not appear. So, encoded string might be placed with raw string literal:

Expand Down
32 changes: 16 additions & 16 deletions base91.hpp → include/base91x.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ SOFTWARE.

//------------------------------------------------------------------------------
/**
* Class base91 provides encoding and decoding statics methods
* Class base91x provides encoding and decoding statics methods
* using numeric system of base 91 with specific alphabet that does not require
* escaping any symbols in C, C++ string.
* The alphabet contains printable characters of ASCII except:
Expand All @@ -48,16 +48,16 @@ SOFTWARE.
* There is possibility to extend the algorithm to use 89 codes during decode.
*/

class base91
class base91x
{
public:
/** Base of the numeric system is 91dec equals ASCII symbol [ */
static const char BASE91_LEN = 91;
static const char BASE91X_LEN = 91;

/** Bits in one byte. Should be 8 */
static const unsigned char_bit = CHAR_BIT;

/** Pair of base91 symbols might code 13 bits */
/** Pair of base91x symbols might code 13 bits */
static const unsigned b91word_bit = 13;

/** 8192 possibly values for 13 bits */
Expand All @@ -72,15 +72,15 @@ class base91
/** Mask for reverse alphabet */
static constexpr unsigned zyx_mask = ZYX_LEN - 1;

/** BASE91 JSON OPTIMIZED ALPHABET: */
static constexpr unsigned char BASE91_ALPHABET[BASE91_LEN] = {'!', '~', '}', '|', '{', 'z', 'y', 'x', 'w', 'v',
/** base91x JSON OPTIMIZED ALPHABET: */
static constexpr unsigned char BASE91X_ALPHABET[BASE91X_LEN] = {'!', '~', '}', '|', '{', 'z', 'y', 'x', 'w', 'v',
'u', 't', 's', 'r', 'q', 'p', 'o', 'n', 'm', 'l', 'k', 'j', 'i', 'h', 'g', 'f', 'e', 'd', 'c', 'b', 'a', '`',
'_', '^', ']', '#', '[', 'Z', 'Y', 'X', 'W', 'V', 'U', 'T', 'S', 'R', 'Q', 'P', 'O', 'N', 'M', 'L', 'K', 'J',
'I', 'H', 'G', 'F', 'E', 'D', 'C', 'B', 'A', '@', '?', '>', '=', '<', ';', ':', '9', '8', '7', '6', '5', '4',
'3', '2', '1', '0', '/', '.', '-', ',', '+', '*', ')', '(', '$', '&', '%'};

/** BASE91 reverse table for quick decoding */
static constexpr char BASE91_ZYX[ZYX_LEN]
/** base91x reverse table for quick decoding */
static constexpr char BASE91X_ZYX[ZYX_LEN]
= {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, 0, -1, 35, 88, 90, 89, -1, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76,
75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50,
Expand Down Expand Up @@ -142,21 +142,21 @@ class base91
bit_collected += char_bit;
while (b91word_bit <= bit_collected)
{
div_t d = std::div(b91word_mask & collector, BASE91_LEN);
text.push_back(BASE91_ALPHABET[d.rem]);
text.push_back(BASE91_ALPHABET[d.quot]);
div_t d = std::div(b91word_mask & collector, BASE91X_LEN);
text.push_back(BASE91X_ALPHABET[d.rem]);
text.push_back(BASE91X_ALPHABET[d.quot]);
collector >>= b91word_bit;
bit_collected -= b91word_bit;
}
}

if (0 != bit_collected)
{
const div_t d = std::div(b91word_mask & collector, BASE91_LEN);
text.push_back(BASE91_ALPHABET[d.rem]);
const div_t d = std::div(b91word_mask & collector, BASE91X_LEN);
text.push_back(BASE91X_ALPHABET[d.rem]);
if (7 <= bit_collected)
{
text.push_back(BASE91_ALPHABET[d.quot]);
text.push_back(BASE91X_ALPHABET[d.quot]);
}
}
}
Expand Down Expand Up @@ -184,7 +184,7 @@ class base91
{
continue;
}
const char digit = BASE91_ZYX[zyx_mask & i];
const char digit = BASE91X_ZYX[zyx_mask & i];
if (-1 == digit)
{
continue;
Expand All @@ -195,7 +195,7 @@ class base91
continue;
}

collector |= (BASE91_LEN * digit + lower) << bit_collected;
collector |= (BASE91X_LEN * digit + lower) << bit_collected;
bit_collected += b91word_bit;
lower = -1;

Expand Down
Loading

0 comments on commit 350bd8d

Please sign in to comment.