Hackerss.com

hackerss
hackerss

Posted on

Java Scrap a website using jsoup to obtain all images


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class ScrapImages {

   public static void main(String[] args) throws IOException {

      //URL of the website to scrap
      String url = "https://www.javadoc.io/";

      //Get the HTML document
      Document doc = Jsoup.connect(url).get();

      //Get all images from the HTML document
      Elements images = doc.select("img[src~=(?i)\\.(png|jpe?g|gif)]");

      //Create a list to store the image URLs
      List<String> imageURLs = new ArrayList<String>();

      //Iterate over the images and get the image URL
      for (Element image : images) {
         String imageURL = image.absUrl("src");
         imageURLs.add(imageURL);
      }

      //Print the image URLs
      for (String imageURL : imageURLs) {
         System.out.println(imageURL);
      }

   }
}
Enter fullscreen mode Exit fullscreen mode

Top comments (0)