Hackerss.com is a community of amazing hackers

Hackerss is a community for developers, data scientitst, ethical hackers, hardware enthusiasts or any person that want to learn / share their knowledge of any aspect of digital technology.

Create account Log in

Posted on

Python Scrap a website to obtain all links and images

import requests
from bs4 import BeautifulSoup

def get_links(url):
    Scrap a website to obtain all links and images.

    url : str
        The website to scrap.

        A list of all links and images.

    #get the html of the website
    html = requests.get(url).text

    #create a BeautifulSoup object
    soup = BeautifulSoup(html, 'html.parser')

    #find all the links and images on the page
    links = soup.find_all('a')
    images = soup.find_all('img')

    #create a list to store all the links and images
    all_links = []

    #loop through all the links and images and add them to the list
    for link in links:

    for image in images:

    #return the list of links and images
    return all_links

url = 'https://www.python.org/'
links = get_links(url)
print(links)  #['/about/', '/downloads/', '/doc/', '/community/', '/events/', '/blogs/', '/news/', '/jobs/', '/sponsors/', '/security/', '/doc/3.6/', '/doc/3.7/', '/doc/3.8/', '/doc/3.9/', '/doc/3.10/', '/doc/3.11/', '/doc/3.12/', '/doc/3.13/', '/doc/3.14/', '/doc/3.15/', '/doc/3.16/', '/doc/3.17/', '/doc/3.18/', '/doc/3.19/', '/doc/3.20/', '/doc/3.21/', '/doc/3.22/', '/doc/3.23/', '/doc/3.24/', '/doc/3.25/', '/doc/3.26/', '/doc/3.27/', '/doc/3.28/', '/doc/3.29/', '/doc/3.30/', '/doc/3.31/', '/doc/3.32/', '/doc/3.33/', '/doc/3.34/', '/doc/3.35/']
Enter fullscreen mode Exit fullscreen mode

Top comments (0)