import os
import json
import requests
import concurrent.futures

repository_directory = os.getcwd()
domains_directory = os.path.join(repository_directory, "domains")

def has_url_field(file_path):
  with open(file_path, "r") as file:
    data = json.load(file)
    record = data.get("record")
    if record and "URL" in record:
      return record["URL"]
  return None

def is_url_reachable(url: str):
  try:
    response = requests.head(url, allow_redirects=True)
    return response.status_code // 100 in [1, 2, 3]  # Check if status code is in the 1xx or 2xx or 3xx range (success)
  except requests.exceptions.RequestException:
    return False

urls_data = {
  "valid": {},
  "invalid": {},
  "non-http": {}
}

def handle_url_validation(file_path):
  url: str = has_url_field(file_path)
  if url:
    if url.startswith("http://") or url.startswith("https://"):
        if is_url_reachable(url):
          urls_data["valid"][file_path] = url
          print(f"URL '{url}' in file '{file_path}' is reachable.")
        else:
          urls_data["invalid"][file_path] = url
          print(f"URL '{url}' in file '{file_path}' is not reachable.")
    else:
      urls_data["non-http"][file_path] = url
      print(f"URL '{url}' in file '{file_path}' is neither HTTP nor HTTPS.")

max_threads = 20

with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
  file_paths = []

  for root, _, files in os.walk(domains_directory):
    for filename in files:
      file_paths.append(os.path.join(root, filename))

  for file_path in file_paths:
    future = executor.submit(handle_url_validation, file_path)

result_file_path = os.path.join(repository_directory, "url-validation-result.json")
with open(result_file_path, "w") as result_file:
    json.dump(urls_data, result_file, indent=4)

print(f"Results saved to {result_file_path}")