mirror of
https://github.com/tiennm99/is-a-dev.git
synced 2026-05-19 07:26:27 +00:00
@@ -0,0 +1,26 @@
|
||||
name: URL Validation
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * 1' # At 00:00 on Monday
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
validation:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 0
|
||||
- name: Setup Up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- run: pip install requests
|
||||
- run: python tests/url-validation.py
|
||||
- name: Upload Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: URL Validiation Result
|
||||
path: url-validation-result.json
|
||||
retention-days: 7
|
||||
@@ -0,0 +1,60 @@
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import concurrent.futures
|
||||
|
||||
repository_directory = os.getcwd()
|
||||
domains_directory = os.path.join(repository_directory, "domains")
|
||||
|
||||
def has_url_field(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
data = json.load(file)
|
||||
record = data.get("record")
|
||||
if record and "URL" in record:
|
||||
return record["URL"]
|
||||
return None
|
||||
|
||||
def is_url_reachable(url: str):
|
||||
try:
|
||||
response = requests.head(url, allow_redirects=True)
|
||||
return response.status_code // 100 in [1, 2, 3] # Check if status code is in the 1xx or 2xx or 3xx range (success)
|
||||
except requests.exceptions.RequestException:
|
||||
return False
|
||||
|
||||
urls_data = {
|
||||
"valid": {},
|
||||
"invalid": {},
|
||||
"non-http": {}
|
||||
}
|
||||
|
||||
def handle_url_validation(file_path):
|
||||
url: str = has_url_field(file_path)
|
||||
if url:
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
if is_url_reachable(url):
|
||||
urls_data["valid"][file_path] = url
|
||||
print(f"URL '{url}' in file '{file_path}' is reachable.")
|
||||
else:
|
||||
urls_data["invalid"][file_path] = url
|
||||
print(f"URL '{url}' in file '{file_path}' is not reachable.")
|
||||
else:
|
||||
urls_data["non-http"][file_path] = url
|
||||
print(f"URL '{url}' in file '{file_path}' is neither HTTP nor HTTPS.")
|
||||
|
||||
max_threads = 20
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
|
||||
file_paths = []
|
||||
|
||||
for root, _, files in os.walk(domains_directory):
|
||||
for filename in files:
|
||||
file_paths.append(os.path.join(root, filename))
|
||||
|
||||
for file_path in file_paths:
|
||||
future = executor.submit(handle_url_validation, file_path)
|
||||
|
||||
result_file_path = os.path.join(repository_directory, "url-validation-result.json")
|
||||
with open(result_file_path, "w") as result_file:
|
||||
json.dump(urls_data, result_file, indent=4)
|
||||
|
||||
print(f"Results saved to {result_file_path}")
|
||||
Reference in New Issue
Block a user