Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into fix_bf16_convert_…
Browse files Browse the repository at this point in the history
…model
  • Loading branch information
popovaan committed Apr 18, 2024
2 parents cc44edd + 7b7650e commit 16191ed
Show file tree
Hide file tree
Showing 341 changed files with 9,153 additions and 3,737 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@
*.vsdx filter=lfs diff=lfs merge=lfs -text
*.bmp filter=lfs diff=lfs merge=lfs -text
*.svg filter=lfs diff=lfs merge=lfs -text
.github/scripts/workflow_rerun/tests/data/log_archive_with_error.zip filter=lfs diff=lfs merge=lfs -text
.github/scripts/workflow_rerun/tests/data/log_archive_wo_error.zip filter=lfs diff=lfs merge=lfs -text
65 changes: 65 additions & 0 deletions .github/scripts/external_pr_labeller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from github import Github, Auth
import os
import logging

import argparse


def get_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument(
'-r',
'--repository-name',
type=str,
required=True,
help='Repository name in the OWNER/REPOSITORY format',
)
parser.add_argument(
'--pr-number', type=int, required=True, help='PR number to label'
)
return parser.parse_args()


def init_logger():
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
logging.basicConfig(
level=LOGLEVEL,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d-%Y %H:%M:%S',
)


if __name__ == '__main__':

init_logger()

LOGGER = logging.getLogger('labeller')
EXTERNAL_PR_LABEL_NAME = 'ExternalPR'

args = get_arguments()
pr_number = args.pr_number
repository_name = args.repository_name

github = Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN')))
gh_repo = github.get_repo(full_name_or_id=repository_name)

pr = gh_repo.get_pull(number=pr_number)

LOGGER.info(f'CONTEXT: PR #{pr_number}. USER: {pr.user.login}. ALL PR LABELS: {list(pr.get_labels())}')

if not gh_repo.has_in_collaborators(pr.user.login):
LOGGER.info(f'THE {pr.user.login} IS NOT A COLLABORATOR')

for label in pr.get_labels():
if label.name == EXTERNAL_PR_LABEL_NAME:
LOGGER.info(f'THE PR ALREADY HAS THE "{EXTERNAL_PR_LABEL_NAME}" LABEL')
break
else:
pr.add_to_labels(EXTERNAL_PR_LABEL_NAME)
LOGGER.info(f'THE "{EXTERNAL_PR_LABEL_NAME}" LABEL WAS ADDED TO THE PR')
else:
LOGGER.info(
f'THE {pr.user.login} IS A COLLABORATOR, NO NEED TO ADD THE "{EXTERNAL_PR_LABEL_NAME}" LABEL'
)

github.close()
Empty file.
20 changes: 20 additions & 0 deletions .github/scripts/workflow_rerun/argument_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import argparse
from pathlib import Path


def get_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--repository-name',
type=str,
required=True,
help='Repository name in the OWNER/REPOSITORY format')
parser.add_argument('--run-id',
type=int,
required=True,
help='Workflow Run ID')
parser.add_argument('--errors-to-look-for-file',
type=str,
required=False,
help='.json file with the errors to look for in logs',
default=Path(__file__).resolve().parent.joinpath('errors_to_look_for.json'))
return parser.parse_args()
17 changes: 17 additions & 0 deletions .github/scripts/workflow_rerun/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging
import os


GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')


def init_logger():
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
logging.basicConfig(level=LOGLEVEL,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d-%Y %H:%M:%S')


init_logger()

LOGGER = logging.getLogger('rerunner')
42 changes: 42 additions & 0 deletions .github/scripts/workflow_rerun/errors_to_look_for.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
[
{
"error_text": "This is a problem related to network connectivity",
"ticket": 135929
},
{
"error_text": "Unable to make request",
"ticket": 135715
},
{
"error_text": "GnuTLS recv error",
"ticket": 131918
},
{
"error_text": "Connection was reset",
"ticket": 131818
},
{
"error_text": "Failed to connect to github.com",
"ticket": 131657
},
{
"error_text": "Could not resolve host: github.com",
"ticket": 131546
},
{
"error_text": "retrieving gpg key timed out",
"ticket": 131538
},
{
"error_text": "Retry limit has been reached for chunk",
"ticket": 131537
},
{
"error_text": "fatal error: downloading",
"ticket": 131424
},
{
"error_text": "Failure when receiving data from the peer",
"ticket": 137121
}
]
132 changes: 132 additions & 0 deletions .github/scripts/workflow_rerun/log_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import json
import re
import tempfile
from pathlib import Path
from typing import TypedDict
from zipfile import ZipFile

from workflow_rerun.constants import LOGGER


class LogFile(TypedDict):
file_name: str
path: Path


class ErrorData(TypedDict):
error_text: str
ticket: int


class LogAnalyzer:
def __init__(self,
path_to_log_archive: Path,
path_to_errors_file: Path) -> None:
self._path_to_log_archive = path_to_log_archive
self._path_to_errors_file = path_to_errors_file

self._errors_to_look_for: list[ErrorData] = []
self._collect_errors_to_look_for()

self._log_dir = tempfile.TemporaryDirectory().name

self._log_files: list[LogFile] = []
self._collect_log_files()

all_txt_log_files_pretty = '\n'.join(map(lambda item: str(item['path']), self._log_files))
LOGGER.info(f'ALL .txt LOG FILES: \n{all_txt_log_files_pretty}')

self.found_matching_error = False

def _collect_errors_to_look_for(self) -> None:
with open(file=self._path_to_errors_file,
mode='r',
encoding='utf-8') as errors_file:
errors_data = json.load(errors_file)
for error_data in errors_data:
self._errors_to_look_for.append(
ErrorData(error_text=error_data['error_text'],
ticket=error_data['ticket'])
)

def _collect_log_files(self) -> None:
"""
Collects the .txt log files from the log archive
The GitHub Actions pipeline logs archive should have the following structure:
> Job_name_0
> step_name_0.txt
> step_name_1.txt
...
> Job_name_1
> step_name_0.txt
> step_name_1.txt
...
> Job_name_2
...
...
We need to only analyze the `*.txt` files
"""

with ZipFile(file=self._path_to_log_archive,
mode='r') as zip_file:
zip_file.extractall(self._log_dir)

for _file in Path(self._log_dir).iterdir():
if _file.is_dir():
for log_file in _file.iterdir():
self._log_files.append(LogFile(file_name=log_file.name,
path=log_file.resolve()))

def _is_error_in_log(self,
error_to_look_for: str,
log_file_path: Path) -> bool:
"""
Searches for the error in the provided log
"""

error_to_look_for = self._clean_up_string(error_to_look_for)

with open(file=log_file_path,
mode='r',
encoding='utf-8') as log_file:
for line in log_file:
if error_to_look_for in self._clean_up_string(line):
return True
return False

@staticmethod
def _clean_up_string(string: str) -> str:
"""
Replaces special characters with spaces in the string, strips it from leading and following spaces,
and lowers it
for "Could not resolve host: github.com" returns "could not resolve host github com"
This cleanup is applied to both errors to look for and logs themselves for matching
"""
return re.sub(r'[^A-Za-z0-9]+', ' ', string).lower().strip()

def analyze(self) -> None:
"""
Iterates over the known errors and tries to find them in the collected log files
"""
for error in self._errors_to_look_for:

LOGGER.info(f'LOOKING FOR "{error["error_text"]}" ERROR...')

for log_file in self._log_files:
if self._is_error_in_log(error_to_look_for=error['error_text'],
log_file_path=log_file['path']):
LOGGER.info(f'FOUND "{error["error_text"]}" ERROR IN {log_file["path"]}. TICKET: {error["ticket"]}')
self.found_matching_error = True
return

if __name__ == '__main__':
# Usage example
log_analyzer = LogAnalyzer(path_to_log_archive=Path('/tmp/logs/log.zip'),
path_to_errors_file=Path('/tmp/errors_to_look_for.json'))
log_analyzer.analyze()
if log_analyzer.found_matching_error:
print('found matching error, see logs above')
21 changes: 21 additions & 0 deletions .github/scripts/workflow_rerun/log_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path

import requests
from github.WorkflowRun import WorkflowRun
from workflow_rerun.constants import GITHUB_TOKEN, LOGGER


def collect_logs_for_run(run: WorkflowRun,
log_archive_path: Path) -> Path:
"""
Collects log archive for a pipeline
"""
with open(file=log_archive_path,
mode='wb') as log_archive:
LOGGER.info(f'STARTED LOG COLLECTION FOR {run.id} IN {log_archive_path}')
# PyGitHub does not expose the "/repos/{owner}/{repo}/actions/runs/{run_id}/logs" endpoint so we have to use requests
log_archive.write(requests.get(url=run.logs_url,
headers={'Authorization': f'Bearer {GITHUB_TOKEN}'}).content)
LOGGER.info(f'COLLECTED LOGS FOR {run.id} IN {log_archive_path}')

return log_archive_path
53 changes: 53 additions & 0 deletions .github/scripts/workflow_rerun/rerunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import sys
import tempfile
from pathlib import Path

from github import Github, Auth
from workflow_rerun.argument_parser import get_arguments
from workflow_rerun.constants import GITHUB_TOKEN, LOGGER
from workflow_rerun.log_analyzer import LogAnalyzer
from workflow_rerun.log_collector import collect_logs_for_run

if __name__ == '__main__':

args = get_arguments()
run_id = args.run_id
repository_name = args.repository_name

github = Github(auth=Auth.Token(token=GITHUB_TOKEN))
gh_repo = github.get_repo(full_name_or_id=repository_name)
run = gh_repo.get_workflow_run(id_=run_id)

LOGGER.info(f'CHECKING IF RERUN IS NEEDED FOR {run.html_url} RUN IN {repository_name}.')

# Check if the run has already been retriggered
# we do not want to fall into a loop with retriggers
if run.run_attempt > 1:
LOGGER.info(f'THERE ARE {run.run_attempt} ATTEMPTS ALREADY. NOT CHECKING LOGS AND NOT RETRIGGERING. EXITING')
sys.exit(0)

log_archive_path = Path(tempfile.NamedTemporaryFile(suffix='.zip').name)

collect_logs_for_run(
run=run,
log_archive_path=log_archive_path,
)

log_analyzer = LogAnalyzer(
path_to_log_archive=log_archive_path,
path_to_errors_file=args.error_to_look_for_file,
)
log_analyzer.analyze()

if log_analyzer.found_matching_error:
LOGGER.info(f'FOUND MATCHING ERROR, RETRIGGERING {run.html_url}')
status = run.rerun()
if status:
LOGGER.info(f'RUN RETRIGGERED SUCCESSFULLY: {run.html_url}')
else:
LOGGER.info(f'RUN WAS NOT RETRIGGERED, SEE ABOVE')

# "status" is True (which is 1) if everything is ok, False (which is 0) otherwise
sys.exit(not status)
else:
LOGGER.info(f'NO ERROR WAS FOUND, NOT RETRIGGERING')
Empty file.
Git LFS file not shown
Git LFS file not shown
Loading

0 comments on commit 16191ed

Please sign in to comment.