mirror of
https://github.com/tiennm99/is-a-dev.git
synced 2026-05-18 15:26:00 +00:00
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
import os
|
|
import json
|
|
import requests
|
|
import concurrent.futures
|
|
|
|
repository_directory = os.getcwd()
|
|
domains_directory = os.path.join(repository_directory, "domains")
|
|
|
|
def has_url_field(file_path):
|
|
with open(file_path, "r") as file:
|
|
data = json.load(file)
|
|
record = data.get("record")
|
|
if record and "URL" in record:
|
|
return record["URL"]
|
|
return None
|
|
|
|
def is_url_reachable(url: str):
|
|
try:
|
|
response = requests.head(url, allow_redirects=True)
|
|
return response.status_code // 100 in [1, 2, 3] # Check if status code is in the 1xx or 2xx or 3xx range (success)
|
|
except requests.exceptions.RequestException:
|
|
return False
|
|
|
|
urls_data = {
|
|
"valid": {},
|
|
"invalid": {},
|
|
"non-http": {}
|
|
}
|
|
|
|
def handle_url_validation(file_path):
|
|
url: str = has_url_field(file_path)
|
|
if url:
|
|
if url.startswith("http://") or url.startswith("https://"):
|
|
if is_url_reachable(url):
|
|
urls_data["valid"][file_path] = url
|
|
print(f"URL '{url}' in file '{file_path}' is reachable.")
|
|
else:
|
|
urls_data["invalid"][file_path] = url
|
|
print(f"URL '{url}' in file '{file_path}' is not reachable.")
|
|
else:
|
|
urls_data["non-http"][file_path] = url
|
|
print(f"URL '{url}' in file '{file_path}' is neither HTTP nor HTTPS.")
|
|
|
|
max_threads = 20
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
|
|
file_paths = []
|
|
|
|
for root, _, files in os.walk(domains_directory):
|
|
for filename in files:
|
|
file_paths.append(os.path.join(root, filename))
|
|
|
|
for file_path in file_paths:
|
|
future = executor.submit(handle_url_validation, file_path)
|
|
|
|
result_file_path = os.path.join(repository_directory, "url-validation-result.json")
|
|
with open(result_file_path, "w") as result_file:
|
|
json.dump(urls_data, result_file, indent=4)
|
|
|
|
print(f"Results saved to {result_file_path}")
|