2017-12-24 01:24:15 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import httplib2
|
2017-12-29 05:44:24 +01:00
|
|
|
import re
|
2017-12-24 01:24:15 +01:00
|
|
|
import socket
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
def parse_links(filename):
|
2017-12-29 05:44:24 +01:00
|
|
|
"""Returns a list of URLs from text file"""
|
|
|
|
with open(filename) as fp:
|
|
|
|
data = fp.read()
|
2018-01-28 21:48:53 +01:00
|
|
|
raw_links = re.findall(
|
|
|
|
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
|
|
|
|
data)
|
2017-12-29 05:44:24 +01:00
|
|
|
links = [raw_link.replace(')', '') for raw_link in raw_links]
|
2017-12-24 01:24:15 +01:00
|
|
|
return links
|
|
|
|
|
2018-01-28 21:48:53 +01:00
|
|
|
|
2017-12-24 01:24:15 +01:00
|
|
|
def validate_links(links):
|
|
|
|
"""Checks each entry in JSON file for live link"""
|
|
|
|
print('Validating {} links...'.format(len(links)))
|
|
|
|
errors = []
|
2017-12-29 05:44:24 +01:00
|
|
|
for link in links:
|
2017-12-24 01:24:15 +01:00
|
|
|
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
|
|
|
|
try:
|
|
|
|
resp = h.request(link, 'HEAD')
|
|
|
|
code = int(resp[0]['status'])
|
|
|
|
# check if status code is a client or server error
|
|
|
|
if code >= 404:
|
|
|
|
errors.append('{}: {}'.format(code, link))
|
|
|
|
except TimeoutError:
|
|
|
|
errors.append("TMO: " + link)
|
|
|
|
except socket.error as socketerror:
|
|
|
|
errors.append("SOC: {} : {}".format(socketerror, link))
|
|
|
|
return errors
|
|
|
|
|
2018-01-28 21:48:53 +01:00
|
|
|
|
2017-12-24 01:24:15 +01:00
|
|
|
if __name__ == "__main__":
|
|
|
|
num_args = len(sys.argv)
|
|
|
|
if num_args < 2:
|
2017-12-29 05:44:24 +01:00
|
|
|
print("No .md file passed")
|
2017-12-24 01:24:15 +01:00
|
|
|
sys.exit(1)
|
|
|
|
errors = validate_links(parse_links(sys.argv[1]))
|
|
|
|
if len(errors) > 0:
|
|
|
|
for err in errors:
|
|
|
|
print(err)
|
|
|
|
sys.exit(1)
|