Merge pull request #8145 from VaibhavSys/main

CI: URL Validation
This commit is contained in:
Vaibhav
2023-09-24 14:49:20 +05:30
committed by GitHub
2 changed files with 86 additions and 0 deletions
+26
View File
@@ -0,0 +1,26 @@
name: URL Validation
on:
schedule:
- cron: '0 0 * * 1' # At 00:00 on Monday
workflow_dispatch:
jobs:
validation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 0
- name: Setup Up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- run: pip install requests
- run: python tests/url-validation.py
- name: Upload Artifact
uses: actions/upload-artifact@v3
with:
name: URL Validiation Result
path: url-validation-result.json
retention-days: 7
+60
View File
@@ -0,0 +1,60 @@
import os
import json
import requests
import concurrent.futures
repository_directory = os.getcwd()
domains_directory = os.path.join(repository_directory, "domains")
def has_url_field(file_path):
with open(file_path, "r") as file:
data = json.load(file)
record = data.get("record")
if record and "URL" in record:
return record["URL"]
return None
def is_url_reachable(url: str):
try:
response = requests.head(url, allow_redirects=True)
return response.status_code // 100 in [1, 2, 3] # Check if status code is in the 1xx or 2xx or 3xx range (success)
except requests.exceptions.RequestException:
return False
urls_data = {
"valid": {},
"invalid": {},
"non-http": {}
}
def handle_url_validation(file_path):
url: str = has_url_field(file_path)
if url:
if url.startswith("http://") or url.startswith("https://"):
if is_url_reachable(url):
urls_data["valid"][file_path] = url
print(f"URL '{url}' in file '{file_path}' is reachable.")
else:
urls_data["invalid"][file_path] = url
print(f"URL '{url}' in file '{file_path}' is not reachable.")
else:
urls_data["non-http"][file_path] = url
print(f"URL '{url}' in file '{file_path}' is neither HTTP nor HTTPS.")
max_threads = 20
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
file_paths = []
for root, _, files in os.walk(domains_directory):
for filename in files:
file_paths.append(os.path.join(root, filename))
for file_path in file_paths:
future = executor.submit(handle_url_validation, file_path)
result_file_path = os.path.join(repository_directory, "url-validation-result.json")
with open(result_file_path, "w") as result_file:
json.dump(urls_data, result_file, indent=4)
print(f"Results saved to {result_file_path}")