Skip to content

Commit

Permalink
Merge pull request #303 from artefactory/ci/update-ci-cd
Browse files Browse the repository at this point in the history
ci: update ci cd
  • Loading branch information
julesbertrand authored Nov 17, 2023
2 parents 2a94bba + d1f85f8 commit 19f2d26
Show file tree
Hide file tree
Showing 32 changed files with 210 additions and 5,831 deletions.
3 changes: 3 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://help.github.com/en/articles/about-code-owners

* @julesbertrand @amaleelhamri @hugovasselin @Guillaume6606
3 changes: 3 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ updates:
time: "09:00"
allow:
- dependency-type: "all"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]
labels:
- draft
- dependencies
Expand Down
28 changes: 14 additions & 14 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,22 @@ jobs:
runs-on: ubuntu-latest

steps:

- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3

- name: Login to Github Container Registry
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
registry: ghcr.io

- name: Set tag name
id: tag
run: echo ::set-output name=tag_name::${GITHUB_REF#*\/*\/}
run: echo "tag_name=${GITHUB_REF//\//-}" >> $GITHUB_OUTPUT
env:
GITHUB_REF: ${{ github.ref }}

Expand All @@ -47,7 +46,8 @@ jobs:
id: scan
with:
image: "ghcr.io/artefactory/nlpretext:${{ steps.tag.outputs.tag_name }}"
acs-report-enable: true
output-format: table

- name: upload Anchore scan SARIF report
if: success() || failure()
uses: github/codeql-action/upload-sarif@v1
Expand All @@ -65,9 +65,10 @@ jobs:
steps:

- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2.2.2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -77,7 +78,7 @@ jobs:
make download-poetry
- name: Set up cache
uses: actions/cache@v3.2.4
uses: actions/cache@v3.3.2
with:
path: ~/.cache/pypoetry/virtualenvs
key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }}
Expand All @@ -88,7 +89,7 @@ jobs:
- name: Install dependencies
run: |
poetry install -E torch
poetry install -E torch -E dask
- name: Publish to PyPI
env:
Expand All @@ -99,8 +100,7 @@ jobs:
- name: Run build script for Sphinx pages
run: |
source $(poetry env info | grep Path | awk '{ print $2 }')/bin/activate
git config --global user.name "Github-Pages Bot"
git config --global user.email "[email protected]"
sh docs/scripts/buildsite.sh
poetry run git config --global user.name "Github-Pages Bot"
poetry run git config --global user.email "[email protected]"
poetry run sh docs/scripts/buildsite.sh
shell: bash
21 changes: 9 additions & 12 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,8 @@ on:
branches:
- main
pull_request:
types:
- opened
- reopened
- edited
- labeled
- unlabeled
- synchronize
branches:
- '*'

jobs:
ci:
Expand All @@ -40,16 +35,18 @@ jobs:

steps:
- uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2.2.2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'

- name: Install poetry
run: make download-poetry

- name: Set up pip cache
uses: actions/cache@v3.2.4
uses: actions/cache@v3.3.2
with:
path: ~/.cache/pypoetry/virtualenvs
key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }}
Expand All @@ -67,15 +64,15 @@ jobs:
- name: Install dependencies
run: |
poetry run pip install --upgrade pip
poetry install -E torch
poetry install -E torch -E dask
- name: Run safety checks
run: |
STRICT=1 make check-safety
- name: Run style checks
- name: Lint and format
run: |
STRICT=1 make check-style
make format-code
- name: Run tests
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ coverage.xml
*.cover
.hypothesis/
.pytest_cache/
.ruff_cache/

# Translations
*.mo
Expand Down
29 changes: 15 additions & 14 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
default_language_version:
python: python3.8
python: python3.10


repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
rev: v4.5.0
hooks:
- id: check-yaml
stages: [commit, push]
- id: end-of-file-fixer
stages: [commit, push]
exclude: ".ipynb"
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-toml
- id: check-json
- id: check-added-large-files

- repo: local
hooks:
Expand All @@ -22,7 +23,7 @@ repos:
stages: [commit, push]
- id: pyupgrade
name: pyupgrade
entry: poetry run pyupgrade --py37-plus
entry: poetry run pyupgrade --py38-plus
types: [python]
language: system
stages: [commit, push]
Expand All @@ -32,19 +33,19 @@ repos:
types: [python]
language: system
stages: [commit, push]
- id: ruff
name: ruf
entry: poetry run ruff check --config pyproject.toml
types: [python]
language: system
stages: [commit, push]
- id: mypy
name: mypy
entry: poetry run mypy
require_serial: true
types: [python]
language: system
stages: [push]
- id: darglint
name: darglint
entry: poetry run darglint -v 2
types: [python]
language: system
stages: [push]
- id: gitleaks
name: gitleaks
entry: make gitleaks
Expand Down
13 changes: 3 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ download-poetry:

.PHONY: install
install:
poetry env use python3.8
poetry env use python3.10
poetry lock -n
poetry install -n
ifneq ($(NO_PRE_COMMIT), 1)
Expand All @@ -106,23 +106,16 @@ gitleaks:
commits="$$(git rev-list --ancestry-path $$(git rev-parse $$(git branch -r --sort=committerdate | tail -1))..$$(git rev-parse HEAD))"; \
if [ "$${commits}" != "" ]; then docker run --rm -v $$(pwd):/code/ zricethezav/gitleaks --path=/code/ -v --commits=$$(echo $${commits} | paste -s -d, -)$(SECRETS_COMMAND_FLAG); fi;

.PHONY: check-style
check-style:
poetry run black --config pyproject.toml --diff --check ./$(BLACK_COMMAND_FLAG) && \
poetry run darglint -v 2 **/*.py$(DARGLINT_COMMAND_FLAG) && \
poetry run isort --settings-path pyproject.toml --check-only **/*.py$(ISORT_COMMAND_FLAG) && \
poetry run mypy --config-file setup.cfg nlpretext tests/**.py$(MYPY_COMMAND_FLAG)

.PHONY: format-code
format-code:
poetry run pre-commit run
poetry run pre-commit run --all

.PHONY: test
test:
poetry run pytest

.PHONY: lint
lint: test check-safety check-style
lint: check-safety format-code test

# Example: make docker VERSION=latest
# Example: make docker IMAGE=some_name VERSION=1.0.4
Expand Down
53 changes: 28 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ All the goto functions you need to handle NLP use-cases, integrated in NLPretext
# TL;DR


> *Working on an NLP project and tired of always looking for the same silly preprocessing functions on the web?* :tired_face:
> *Working on an NLP project and tired of always looking for the same silly preprocessing functions on the web?* :tired_face:
> *Need to efficiently extract email adresses from a document? Hashtags from tweets? Remove accents from a French post?* :disappointed_relieved:

**NLPretext got you covered!** :rocket:

NLPretext packages in a **unique** library all the text **preprocessing** functions you need to **ease** your NLP project.
NLPretext packages in a **unique** library all the text **preprocessing** functions you need to **ease** your NLP project.


:mag: Quickly explore below our preprocessing pipelines and individual functions referential.
Expand Down Expand Up @@ -215,7 +215,7 @@ print(example)

## Data augmentation <a name="data_augmentation"></a>

The augmentation module helps you to **generate new texts** based on your given examples by modifying some words in the initial ones and to **keep associated entities unchanged**, if any, in the case of **NER tasks**. If you want words other than entities to remain unchanged, you can specify it within the `stopwords` argument. Modifications depend on the chosen method, the ones currently supported by the module are **substitutions with synonyms** using Wordnet or BERT from the [`nlpaug`](https://github.com/makcedward/nlpaug) library.
The augmentation module helps you to **generate new texts** based on your given examples by modifying some words in the initial ones and to **keep associated entities unchanged**, if any, in the case of **NER tasks**. If you want words other than entities to remain unchanged, you can specify it within the `stopwords` argument. Modifications depend on the chosen method, the ones currently supported by the module are **substitutions with synonyms** using Wordnet or BERT from the [`nlpaug`](https://github.com/makcedward/nlpaug) library.

```python
from nlpretext.augmentation.text_augmentation import augment_text
Expand Down Expand Up @@ -270,30 +270,33 @@ This project is licensed under the terms of the `Apache Software License 2.0` li
# Project Organization
------------

.
├── .github/workflows <- Where the CI and CD lives
├── datasets/external <- Bash scripts to download external datasets
├── docker <- All you need to build a Docker image from that package
├── docs <- Sphinx HTML documentation
├── nlpretext <- Main Package. This is where the code lives
│   ├── preprocessor.py <- Main preprocessing script
│   ├── text_loader.py <- Main loading script
│   ├── augmentation <- Text augmentation script
│   ├── basic <- Basic text preprocessing
│   ├── cli <- Command lines that can be used
│   ├── social <- Social text preprocessing
│   ├── token <- Token text preprocessing
│  ├── textloader <- File loading
│   ├── _config <- Where the configuration and constants live
│   └── _utils <- Where preprocessing utils scripts lives
├── references <- assets
├── tests <- Where the tests lives
├── .gitignore
├── .pre-commit-config.yaml <- Pre-commit configuration
├── CODE_OF_CONDUCT.md <- Code of conduct guidelines
├── CONTRIBUTING.md <- Contribution guidelines
├── LICENSE
├── CONTRIBUTING.md <- Contribution guidelines
├── CODE_OF_CONDUCT.md <- Code of conduct guidelines
├── Makefile
├── README.md <- The top-level README for developers using this project.
├── .github/workflows <- Where the CI and CD lives
├── datasets/external <- Bash scripts to download external datasets
├── docker <- All you need to build a Docker image from that package
├── docs <- Sphinx HTML documentation
├── nlpretext <- Main Package. This is where the code lives
│   ├── preprocessor.py <- Main preprocessing script
│   ├── text_loader.py <- Main loading script
│   ├── augmentation <- Text augmentation script
│   ├── basic <- Basic text preprocessing
│   ├── cli <- Command lines that can be used
│   ├── social <- Social text preprocessing
│   ├── token <- Token text preprocessing
│  ├── textloader <- File loading
│   ├── _config <- Where the configuration and constants live
│   └── _utils <- Where preprocessing utils scripts lives
├── tests <- Where the tests lives
├── pyproject.toml <- Package configuration
├── poetry.lock
└── setup.cfg <- Configuration for plugins and other utils
├── pyproject.toml <- Package build configuration
├── README.md <- The top-level README for developers using this project.
└── SECURITY.md

# Credits

Expand Down
2 changes: 1 addition & 1 deletion SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Vulnerabilities can be reported by emailing core members:

- artefactory [rafaelle.aygalenq@artefact.com](mailto:rafaelle.aygalenq@artefact.com)
- artefactory [jules.bertrand@artefact.com](mailto:jules.bertrand@artefact.com)

Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:

Expand Down
2 changes: 1 addition & 1 deletion datasets/external/get_stanfordtweets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#!/bin/bash
#!/bin/bash
wget -O trainingandtestdata.zip http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip trainingandtestdata.zip
mkdir -p tweets_sentiment && cp trainingandtestdata.zip tweets_sentiment && cd tweets_sentiment && unzip trainingandtestdata.zip
24 changes: 9 additions & 15 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9.7-slim-buster
FROM python:3.10-slim-buster

ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8
Expand All @@ -8,24 +8,18 @@ RUN apt-get update && \
curl coreutils \
&& rm -rf /var/lib/apt/lists/*

RUN useradd -d /home/docker_user -m -s /bin/bash docker_user
USER docker_user
# Install Poetry
ENV POETRY_VERSION=1.5.1
RUN pip install --upgrade pip
RUN python3 -m pip install "poetry==$POETRY_VERSION"

RUN mkdir -p /home/docker_user/workspace
WORKDIR /home/docker_user/workspace

# Install Poetry
RUN curl -sSL -o install-poetry.py https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py \
&& echo 'daad01ac0c1636f1c0154575c6b3b37a0867e9cedd67d1224fc4259c07b03a86 install-poetry.py' | sha256sum --check \
&& POETRY_HOME=/home/docker_user/poetry python install-poetry.py \
&& rm install-poetry.py

ENV PATH="${PATH}:/home/docker_user/.poetry/bin:/home/docker_user/poetry/bin"
WORKDIR /home/workspace

COPY pyproject.toml ./
COPY poetry.lock ./

RUN poetry install --no-root --no-dev
RUN poetry config virtualenvs.create false \
&& poetry lock \
&& poetry install --no-root --no-dev --no-interaction

COPY . /home/docker_user/workspace/

Expand Down
2 changes: 1 addition & 1 deletion docs/source/tutorials/basic_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}
Loading

0 comments on commit 19f2d26

Please sign in to comment.