import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.openqa.selenium.support.ui.ExpectedCondition; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; import javax.xml.crypto.Data; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CountDownLatch; public class ScraperThread extends Thread { private String url; private CountDownLatch latch; public ScraperThread(String url, CountDownLatch latch) { this.url = url; this.latch = latch; } public WebDriver driver; private void initializeWebDriver() { System.setProperty("webdriver.chrome.driver", "C:\\chromedriver-win64\\chromedriver.exe"); ChromeOptions options = new ChromeOptions(); options.setBinary("C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"); options.addArguments("--headless"); options.addArguments("--disable-gpu"); options.addArguments("--remote-allow-origins=*"); options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"); driver = new ChromeDriver(options); } private void closeWebDriver() { if (driver != null) { driver.quit(); } } private void connectToWeb(String queryUrl, int numPeople) { driver.get(queryUrl); WebDriverWait wait = new WebDriverWait(driver, 40); // 40s timeout buffer switch (url) { case "https://booking.escapetravel.mk/": wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("#hotels-container"))); try { Thread.sleep(10000);} catch (InterruptedException e) { e.printStackTrace(); }//price fetch break; case "https://magelantravel.mk/": wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("div.sodrzina"))); break; } String pageSource = driver.getPageSource(); System.out.println("Connected to " + queryUrl); Document doc = Jsoup.parse(pageSource); Element parentDiv; Elements childDivs; switch (url) { case "https://booking.escapetravel.mk/": parentDiv = doc.selectFirst("#hotels-container"); if (parentDiv != null) { childDivs = parentDiv.select("a.hotel-item"); for (Element div : childDivs) { String data = div.outerHtml(); Option option = optionParser(data, numPeople); if (option != null) { option.setId(DatabaseUtil.saveOptionToDatabase(option)); scrapeOptionInfo(option); System.out.println("Parsed " + option); } } } else { System.out.println("Parent div not found"); } break; case "https://magelantravel.mk/": parentDiv = doc.selectFirst("div.sodrzina"); if (parentDiv != null) { childDivs = parentDiv.select("div.destinacija"); childDivs.removeIf(div -> div.attr("style").contains("display:none") || div.attr("style").contains("display: none")); System.out.println("Filtered childDivs size: " + childDivs.size()); for (Element div : childDivs) { String data = div.outerHtml(); Option newOption = optionParser(data, numPeople); if (newOption != null) { newOption.setId(DatabaseUtil.saveOptionToDatabase(newOption)); scrapeOptionInfo(newOption); System.out.println("Parsed " + newOption); } } } else { System.out.println("Parent div not found"); } break; default: System.out.println("URL not recognized for parsing."); } } private void scrapeOptionInfo(Option option) { String url = option.getLink(); if(url.contains("magelantravel.mk")) { System.out.println("Scraping info for " + option.getHotelName()); String[] dates = option.getDateRange().split(" - "); url += "&checkin=" + dates[0] + "&checkout=" + dates[1] + "&adult=" + option.getNumPeople(); driver.get(url); try { Thread.sleep(5000); } catch (InterruptedException e) { e.printStackTrace(); } //data fetch String pageSource = driver.getPageSource(); Document doc = Jsoup.parse(pageSource); Elements roomOptions = doc.select(".tblroom > tbody > tr"); for (Element roomOption : roomOptions) { String type = roomOption.select("a.tblroom-type").text(); String board = roomOption.select(".rezervacija-objekt").text(); if(board.length() > 2) { board = board.substring(0, 2); } if(board.isEmpty() || type.isEmpty()) continue; Elements amenityElement = roomOption.select(".objekt-opis"); String amenity = (amenityElement != null ? amenityElement.text() : ""); System.out.println(amenity + " " + board + " " + type ); String priceText = roomOption.select(".tbl-cena").text().replace("€", "").trim(); float price; if (!priceText.isEmpty()) { price = Float.parseFloat(priceText); }else continue; //Check for changes int odId = checkForChanges(option.getId(), type, board,amenity,price); if(odId != 0) { //true = changes found - update details DatabaseUtil.updateOptionDetails(odId,type,board,amenity,price); }else{ //false = not found / no changes - save regular DatabaseUtil.saveOptionDetails(option.getId(), type, board, amenity, price); } } } else if(url.contains("booking.escapetravel.mk")){ System.out.println("Scraping info for " + url); driver.get(url); try { Thread.sleep(5000); } catch (InterruptedException e) { e.printStackTrace(); } //data fetch String pageSource = driver.getPageSource(); Document doc = Jsoup.parse(pageSource); Elements roomOptions = doc.select("#hotel-rooms-container .hotel-room-row"); for(Element roomOption : roomOptions){ String type = roomOption.select("td.align-middle").first().text(); String board = roomOption.select("td.align-middle.text-primary.lead").text(); if (board.isEmpty() || type.isEmpty()) continue; String priceText = roomOption.select("td.align-middle.text-end .text-success.d-block.lead").text().replace("€", "").trim(); float price; if (!priceText.isEmpty()) { price = Float.parseFloat(priceText.replace(",", "")); } else continue; Elements amenityElements = doc.select("div.row > div.col-6.col-md-3.col-xl-2"); StringBuilder amenities = new StringBuilder(); for (Element amenityElement : amenityElements) { amenities.append(amenityElement.text()).append(", "); } if (!amenities.isEmpty()) { amenities.setLength(amenities.length() - 2); } System.out.println(type + board + price + amenities); int odId = checkForChanges(option.getId(), type, board,amenities.toString(),price); if(odId != 0) { //true = changes found - update details DatabaseUtil.updateOptionDetails(odId,type,board,amenities.toString(),price); }else{ //false = not found / no changes - save regular DatabaseUtil.saveOptionDetails(option.getId(), type, board, amenities.toString(), price); } } } } private int checkForChanges(int id, String type, String board, String amenities, float price){ //return true for changes, false for no changes try { List