diff --git a/.github/workflows/bats.yml b/.github/workflows/bats.yml index 0c4d250..29250b2 100644 --- a/.github/workflows/bats.yml +++ b/.github/workflows/bats.yml @@ -7,14 +7,14 @@ jobs: runs-on: ubuntu-latest strategy: - max-parallel: 4 + max-parallel: 6 matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/README.md b/README.md index f836fe2..fcac1a8 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Crypt4GH Encryption Utility -`crypt4gh`is a Python tool to encrypt, decrypt or re-encrypt files, according to the [GA4GH encryption file format](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/). +`crypt4gh` is a Python tool to encrypt, decrypt or re-encrypt files, according to the [GA4GH encryption file format](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/). ## Installation diff --git a/crypt4gh/__init__.py b/crypt4gh/__init__.py index 783278f..ad47ab5 100644 --- a/crypt4gh/__init__.py +++ b/crypt4gh/__init__.py @@ -37,7 +37,7 @@ __title__ = 'GA4GH cryptographic utilities' -__version__ = '1.6' # VERSION in header is 1 (as 4 bytes little endian) +__version__ = '1.7' # VERSION in header is 1 (as 4 bytes little endian) __author__ = 'Frédéric Haziza' __author_email__ = 'frederic.haziza@crg.eu' __license__ = 'Apache License 2.0' diff --git a/crypt4gh/cli.py b/crypt4gh/cli.py index 1859787..345fdfe 100644 --- a/crypt4gh/cli.py +++ b/crypt4gh/cli.py @@ -26,10 +26,10 @@ Utility for the cryptographic GA4GH standard, reading from stdin and outputting to stdout. Usage: - {PROG} [-hv] [--log ] encrypt [--sk ] --recipient_pk [--recipient_pk ]... [--range ] + {PROG} [-hv] [--log ] encrypt [--sk ] --recipient_pk [--recipient_pk ]... [--range ] [--header ] {PROG} [-hv] [--log ] decrypt [--sk ] [--sender_pk ] [--range ] {PROG} [-hv] [--log ] rearrange [--sk ] --range - {PROG} [-hv] [--log ] reencrypt [--sk ] --recipient_pk [--recipient_pk ]... [--trim] + {PROG} [-hv] [--log ] reencrypt [--sk ] --recipient_pk [--recipient_pk ]... [--trim] [--header-only] Options: -h, --help Prints this help and exit @@ -41,7 +41,8 @@ --sender_pk Peer's Curve25519-based Public key to verify provenance (akin to signature) --range Byte-range either as or just (Start included, End excluded) -t, --trim Keep only header packets that you can decrypt - + --header Where to write the header (default: stdout) + --header-only Whether the input data consists only of a header (default: false) Environment variables: C4GH_LOG If defined, it will be used as the default logger @@ -146,11 +147,20 @@ def build_recipients(): if not recipient_keys: raise ValueError("No Recipients' Public Key found") - lib.encrypt(recipient_keys, - sys.stdin.buffer, - sys.stdout.buffer, - offset = range_start, - span = range_span) + header = args["--header"] + + try: + if header: + header = open(header, 'wb') # let it raise exception + lib.encrypt(recipient_keys, + sys.stdin.buffer, + sys.stdout.buffer, + headerfile = header, + offset = range_start, + span = range_span) + finally: + if header: + header.close() def decrypt(args): @@ -212,4 +222,5 @@ def build_recipients(): recipient_keys, sys.stdin.buffer, sys.stdout.buffer, - trim=args['--trim']) + trim=args['--trim'], + header_only=args['--header-only']) diff --git a/crypt4gh/lib.py b/crypt4gh/lib.py index 601797b..341795c 100644 --- a/crypt4gh/lib.py +++ b/crypt4gh/lib.py @@ -46,7 +46,7 @@ def _encrypt_segment(data, process, key): @close_on_broken_pipe -def encrypt(keys, infile, outfile, offset=0, span=None): +def encrypt(keys, infile, outfile, headerfile=None, offset=0, span=None): '''Encrypt infile into outfile, using the list of keys. @@ -57,6 +57,8 @@ def encrypt(keys, infile, outfile, offset=0, span=None): ''' LOG.info('Encrypting the file') + + headerfile = headerfile or outfile # Forward to start position LOG.debug(" Start Coordinate: %s", offset) @@ -91,7 +93,7 @@ def encrypt(keys, infile, outfile, offset=0, span=None): header_bytes = header.serialize(header_packets) LOG.debug('header length: %d', len(header_bytes)) - outfile.write(header_bytes) + headerfile.write(header_bytes) # ...and cue music LOG.debug("Streaming content") @@ -405,7 +407,7 @@ def decrypt(keys, infile, outfile, sender_pubkey=None, offset=0, span=None): @close_on_broken_pipe -def reencrypt(keys, recipient_keys, infile, outfile, chunk_size=4096, trim=False): +def reencrypt(keys, recipient_keys, infile, outfile, chunk_size=4096, trim=False, header_only=False): '''Extract header packets from infile and generate another one to outfile. The encrypted data section is only copied from infile to outfile.''' @@ -414,6 +416,11 @@ def reencrypt(keys, recipient_keys, infile, outfile, chunk_size=4096, trim=False packets = header.reencrypt(header_packets, keys, recipient_keys, trim=trim) outfile.write(header.serialize(packets)) + # If header-only reencryption, we are done. + if header_only: + LOG.info(f'Header-only reencryption Successful') + return + # Stream the remainder LOG.info(f'Streaming the remainder of the file') while True: diff --git a/docs/conf.py b/docs/conf.py index 320df70..bde48db 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -130,7 +130,7 @@ today_fmt = '%B %d, %Y' def setup(app): - app.add_stylesheet('custom.css') + app.add_css_file('custom.css') # -- Other stuff ---------------------------------------------------------- htmlhelp_basename = 'crypt4gh' diff --git a/docs/usage.rst b/docs/usage.rst index 8d95c3f..a84ff0b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -64,3 +64,26 @@ Any user can generate a keypair with: $ crypt4gh-keygen --sk user.sec --pk user.pub The private key will be encrypted with a passphrase. The user is prompted at the terminal for that passphrase. + +Storing the encrypted header separately +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The encrypted header can be stored separately from the encrypted data. This is useful, for example, when sharing the encrypted message with many recipients. In this case, only the header needs to be re-encrypted (for a specific recipient) while the encrypted data can stay the same. + +To store the encrypted header in a separate file ``header.dat``, use the flag ``--header``: + +.. code-block:: console + + $ crypt4gh encrypt --sk alice.sec --recipient_pk bob.pub --header header.bob.c4gh < M > M.data.c4gh + +Bob can then decrypt the message by concatenating the header and the data, and decrypting the whole file: + +.. code-block:: console + + $ cat header.bob.c4gh M.data.c4gh | crypt4gh decrypt --sk bob.sec > M + +To re-encrypt the message for another user Eve, with public key ``eve.pub``, Alice can run the ``crypt4gh reencrypt`` command: + +.. code-block:: console + + $ crypt4gh reencrypt --sk alice.sec --recipient_pk eve.pub < header.alice.c4gh > header.eve.c4gh diff --git a/setup.py b/setup.py index 295f085..5b3bb88 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ _readme = (Path(__file__).parent / "README.md").read_text() setup(name='crypt4gh', - version='1.6', + version='1.7', url='https://www.github.com/EGA-archive/crypt4gh', license='Apache License 2.0', author='Frédéric Haziza', diff --git a/tests/header_stream.bats b/tests/header_stream.bats new file mode 100644 index 0000000..123785b --- /dev/null +++ b/tests/header_stream.bats @@ -0,0 +1,54 @@ +#!/usr/bin/env bats + +load _common/helpers + +function setup() { + + # Defining the TMP dir + TESTFILES=${BATS_TEST_FILENAME}.d + mkdir -p "$TESTFILES" + +} + +function teardown() { + rm -rf ${TESTFILES} +} + +@test "Bob sends the testfile secretly (with separate header and data) to Alice" { + + TESTFILE=${BATS_TEST_DIRNAME}/_common/testfile.abcd + + # Bob encrypts the testfile for Alice, storing the header separately + export C4GH_PASSPHRASE=${BOB_PASSPHRASE} + crypt4gh encrypt --sk ${BOB_SECKEY} --recipient_pk ${ALICE_PUBKEY} --header $TESTFILES/header.alice.c4gh < $TESTFILE > $TESTFILES/data.c4gh + + # Alice concatenates the header and the data and decrypts the combined result + export C4GH_PASSPHRASE=${ALICE_PASSPHRASE} + cat $TESTFILES/header.alice.c4gh $TESTFILES/data.c4gh | crypt4gh decrypt --sk ${ALICE_SECKEY} > $TESTFILES/message.received + + run diff $TESTFILE $TESTFILES/message.received + [ "$status" -eq 0 ] + + unset C4GH_PASSPHRASE +} + +@test "Bob encrypts the testfile for himself (with separate header) and reencrypts the header for Alice" { + + TESTFILE=${BATS_TEST_DIRNAME}/_common/testfile.abcd + + # Bob encrypts the testfile for himself + export C4GH_PASSPHRASE=${BOB_PASSPHRASE} + crypt4gh encrypt --sk ${BOB_SECKEY} --recipient_pk ${BOB_PUBKEY} --header $TESTFILES/header.bob.c4gh < $TESTFILE > $TESTFILES/data.c4gh + + # Bob changes the header for Alice + crypt4gh reencrypt --sk ${BOB_SECKEY} --recipient_pk ${ALICE_PUBKEY} --header-only < $TESTFILES/header.bob.c4gh > $TESTFILES/header.alice.c4gh + + # Alice concatenates the header and data and decrypts the results + export C4GH_PASSPHRASE=${ALICE_PASSPHRASE} + cat $TESTFILES/header.alice.c4gh $TESTFILES/data.c4gh | crypt4gh decrypt --sk ${ALICE_SECKEY} > $TESTFILES/message.received + + run diff $TESTFILE $TESTFILES/message.received + [ "$status" -eq 0 ] + + unset C4GH_PASSPHRASE +} \ No newline at end of file