From cfaef23a46cdeb89622c28393a0c88947008ba11 Mon Sep 17 00:00:00 2001 From: Vaibhav Dhiman Date: Sat, 23 Sep 2023 23:03:49 +0530 Subject: [PATCH 1/4] feat(ci): add url validiation --- .github/workflows/url-validation.yml | 26 ++++++++++++ tests/url-validation.py | 60 ++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 .github/workflows/url-validation.yml create mode 100644 tests/url-validation.py diff --git a/.github/workflows/url-validation.yml b/.github/workflows/url-validation.yml new file mode 100644 index 000000000..09fae9cc5 --- /dev/null +++ b/.github/workflows/url-validation.yml @@ -0,0 +1,26 @@ +name: URL Validation +on: + schedule: + - cron: '0 0 * * 1' # At 00:00 on Monday + workflow_dispatch: + +jobs: + validation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + - name: Setup Up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - run: pip install requests + - run: python tests/url-validation.py + - name: Upload Artifact + uses: actions/upload-artifact@v3 + with: + name: URL Validiation Result + path: url-validiation-result.json + retention-days: 6 diff --git a/tests/url-validation.py b/tests/url-validation.py new file mode 100644 index 000000000..25d2f7b70 --- /dev/null +++ b/tests/url-validation.py @@ -0,0 +1,60 @@ +import os +import json +import requests +import concurrent.futures + +repository_directory = os.getcwd() +domains_directory = os.path.join(repository_directory, "domains") + +def has_url_field(file_path): + with open(file_path, "r") as file: + data = json.load(file) + record = data.get("record") + if record and "URL" in record: + return record["URL"] + return None + +def is_url_reachable(url: str): + try: + response = requests.head(url, allow_redirects=True) + return response.status_code // 100 in [1, 2, 3] # Check if status code is in the 1xx or 2xx or 3xx range (success) + except requests.exceptions.RequestException: + return False + +urls_data = { + "valid": {}, + "invalid": {}, + "non-http": {} +} + +def handle_url_validation(file_path): + url: str = has_url_field(file_path) + if url: + if url.startswith("http://") or url.startswith("https://"): + if is_url_reachable(url): + urls_data["valid"][file_path] = url + print(f"URL '{url}' in file '{file_path}' is reachable.") + else: + urls_data["invalid"][file_path] = url + print(f"URL '{url}' in file '{file_path}' is not reachable.") + else: + urls_data["non-http"][file_path] = url + print(f"URL '{url}' in file '{file_path}' is neither HTTP nor HTTPS.") + +max_threads = 20 + +with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: + file_paths = [] + + for root, _, files in os.walk(domains_directory): + for filename in files: + file_paths.append(os.path.join(root, filename)) + + for file_path in file_paths: + future = executor.submit(handle_url_validation, file_path) + +result_file_path = os.path.join(repository_directory, "url-validation-result.json") +with open(result_file_path, "w") as result_file: + json.dump(urls_data, result_file, indent=4) + +print(f"Results saved to {result_file_path}") From a69ad0314409a602a194d3335cd744e54b0efc8d Mon Sep 17 00:00:00 2001 From: Vaibhav Dhiman Date: Sat, 23 Sep 2023 23:21:41 +0530 Subject: [PATCH 2/4] fix(ci): syntax error in url validiation --- .github/workflows/url-validation.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/url-validation.yml b/.github/workflows/url-validation.yml index 09fae9cc5..061bb856d 100644 --- a/.github/workflows/url-validation.yml +++ b/.github/workflows/url-validation.yml @@ -19,8 +19,8 @@ jobs: - run: pip install requests - run: python tests/url-validation.py - name: Upload Artifact - uses: actions/upload-artifact@v3 - with: - name: URL Validiation Result - path: url-validiation-result.json - retention-days: 6 + uses: actions/upload-artifact@v3 + with: + name: URL Validiation Result + path: url-validiation-result.json + retention-days: 6 From f86e95694513e6716e31a486d67d06f7548b2500 Mon Sep 17 00:00:00 2001 From: Vaibhav Dhiman Date: Sat, 23 Sep 2023 23:27:03 +0530 Subject: [PATCH 3/4] fix(ci): typo in url validiation artifact --- .github/workflows/url-validation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/url-validation.yml b/.github/workflows/url-validation.yml index 061bb856d..dfa40dcb5 100644 --- a/.github/workflows/url-validation.yml +++ b/.github/workflows/url-validation.yml @@ -22,5 +22,5 @@ jobs: uses: actions/upload-artifact@v3 with: name: URL Validiation Result - path: url-validiation-result.json + path: url-validation-result.json retention-days: 6 From b99ed4151557a726821dbe97862509ffb3a16ec6 Mon Sep 17 00:00:00 2001 From: Vaibhav Date: Sun, 24 Sep 2023 11:49:44 +0530 Subject: [PATCH 4/4] Update .github/workflows/url-validation.yml Co-authored-by: William Harrison --- .github/workflows/url-validation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/url-validation.yml b/.github/workflows/url-validation.yml index dfa40dcb5..9818ba4f7 100644 --- a/.github/workflows/url-validation.yml +++ b/.github/workflows/url-validation.yml @@ -23,4 +23,4 @@ jobs: with: name: URL Validiation Result path: url-validation-result.json - retention-days: 6 + retention-days: 7