import requests
from bs4 import BeautifulSoup
def get_links(url):
"""
Scrap a website to obtain all links and images.
Parameters
----------
url : str
The website to scrap.
Returns
-------
list
A list of all links and images.
"""
#get the html of the website
html = requests.get(url).text
#create a BeautifulSoup object
soup = BeautifulSoup(html, 'html.parser')
#find all the links and images on the page
links = soup.find_all('a')
images = soup.find_all('img')
#create a list to store all the links and images
all_links = []
#loop through all the links and images and add them to the list
for link in links:
all_links.append(link.get('href'))
for image in images:
all_links.append(image.get('src'))
#return the list of links and images
return all_links
#example
url = 'https://www.python.org/'
links = get_links(url)
#output
print(links) #['/about/', '/downloads/', '/doc/', '/community/', '/events/', '/blogs/', '/news/', '/jobs/', '/sponsors/', '/security/', '/doc/3.6/', '/doc/3.7/', '/doc/3.8/', '/doc/3.9/', '/doc/3.10/', '/doc/3.11/', '/doc/3.12/', '/doc/3.13/', '/doc/3.14/', '/doc/3.15/', '/doc/3.16/', '/doc/3.17/', '/doc/3.18/', '/doc/3.19/', '/doc/3.20/', '/doc/3.21/', '/doc/3.22/', '/doc/3.23/', '/doc/3.24/', '/doc/3.25/', '/doc/3.26/', '/doc/3.27/', '/doc/3.28/', '/doc/3.29/', '/doc/3.30/', '/doc/3.31/', '/doc/3.32/', '/doc/3.33/', '/doc/3.34/', '/doc/3.35/']
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)