Changeset c164f8f for backend/GlobeGuru-backend/src
- Timestamp:
- 01/09/25 18:31:38 (6 days ago)
- Branches:
- master
- Children:
- 53bad7e
- Parents:
- d4d8f61
- Location:
- backend/GlobeGuru-backend/src/main
- Files:
-
- 6 added
- 1 deleted
- 3 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
backend/GlobeGuru-backend/src/main/java/Option.java
rd4d8f61 rc164f8f 1 import java.util.Objects; 2 1 3 public class Option { 4 private int id; 2 5 private String hotelName; 3 6 private String country; 4 private Stringprice;5 private String link; // Ensure to add a field for link7 private float price; 8 private String link; 6 9 private String imgSrc; 10 11 //Price changing 12 private float newPrice = 0; 13 private boolean isPriceChanged = false; 14 private String dateRange; 7 15 // Constructor 8 public Option(){} 16 public Option(){ 17 price = 0; 18 } 19 20 public void setDateRange(String dateRange) { 21 this.dateRange = dateRange; 22 } 23 24 public String getDateRange() { 25 return dateRange; 26 } 9 27 10 28 public boolean isEmpty(){ 11 return (hotelName == null || country == null || price == null|| link == null || imgSrc == null);29 return (hotelName == null || country == null || price == 0 || link == null || imgSrc == null); 12 30 } 13 // Getters and setters (ensure you have these methods)14 31 public String getHotelName() { 15 32 return hotelName; … … 28 45 } 29 46 30 public StringgetPrice() {47 public float getPrice() { 31 48 return price; 32 49 } 33 50 34 public void setPrice( Stringprice) {51 public void setPrice(float price) { 35 52 this.price = price; 36 53 } … … 52 69 } 53 70 54 // toString method (for debugging purposes) 71 @Override 72 public boolean equals(Object obj) { 73 if(this==obj) return true; 74 if(obj == null || getClass() != obj.getClass()) return false; 75 Option option = (Option) obj; 76 return Float.compare(option.price, price) == 0 77 && Objects.equals(hotelName, option.hotelName) 78 && Objects.equals(country, option.country) 79 && Objects.equals(link, option.link); 80 } 81 82 @Override 83 public int hashCode() { 84 return Objects.hash(hotelName,country,price,link); 85 } 86 87 public int getId() { 88 return id; 89 } 90 91 public void setId(int id) { 92 this.id = id; 93 } 94 95 //debug 55 96 @Override 56 97 public String toString() { 57 98 return "Option{" + 99 "id='" + id + '\'' + 100 "dateRange='" + dateRange + '\'' + 58 101 "hotelName='" + hotelName + '\'' + 59 102 ", country='" + country + '\'' + 60 103 ", price='" + price + '\'' + 61 104 ", link='" + link + '\'' + 105 ", image='" + imgSrc + 62 106 '}'; 63 107 } 108 109 public void setPriceChanged(boolean a){ 110 isPriceChanged = a; 111 } 112 public void setNewPrice(float a){ 113 newPrice = a; 114 } 115 116 public boolean isPriceChanged() { 117 return isPriceChanged; 118 } 119 120 public float getNewPrice() { 121 return newPrice; 122 } 64 123 } -
backend/GlobeGuru-backend/src/main/java/Scraper.java
rd4d8f61 rc164f8f 7 7 import java.util.Iterator; 8 8 import java.util.List; 9 import java.util.concurrent.Callable; 9 10 import java.util.concurrent.ConcurrentLinkedQueue; 10 11 import java.util.concurrent.CountDownLatch; 11 12 12 public class Scraper extends Thread { 13 public class Scraper implements Callable<Void> { 14 13 15 private List<String> urls; 14 private String destination;15 private String departureDate;16 private int numberOfPeople;17 16 private ConcurrentLinkedQueue<Option> optionsQueue; 18 17 private CountDownLatch latch; 19 18 20 public Scraper( String destination, String departureDate, int numberOfPeople) {19 public Scraper() { 21 20 urls = new ArrayList<>(); 22 21 this.optionsQueue = new ConcurrentLinkedQueue<>(); 23 22 ObjectMapper mapper = new ObjectMapper(); 24 23 try { 25 JsonNode root = mapper.readTree(new File("src/main/java/URLsJSON.json")); 24 ClassLoader classLoader = getClass().getClassLoader(); 25 JsonNode root = mapper.readTree(new File(classLoader.getResource("URLsJSON.json").getFile())); 26 26 27 JsonNode urlNode = root.get("agencyurls"); 27 28 if (urlNode.isArray()) { … … 36 37 throw new RuntimeException(e); 37 38 } 38 this.destination = destination;39 this.departureDate = departureDate;40 this.numberOfPeople = numberOfPeople;41 39 this.latch = new CountDownLatch(urls.size()); 42 40 } 43 41 44 @Override 45 public void run() {42 43 public Void call() { 46 44 System.out.println("Scraper has started "); 47 45 for (String url : urls) { 48 new ScraperThread(url, destination, departureDate, numberOfPeople,optionsQueue, latch).start();46 new ScraperThread(url, optionsQueue, latch).start(); 49 47 } 50 } 51 public List<Option> getOptions() { 52 try { 53 latch.await(); // Wait for all threads to finish 54 } catch (InterruptedException e) { 55 e.printStackTrace(); 56 } 57 return new ArrayList<>(optionsQueue); 48 return null; 58 49 } 59 50 } -
backend/GlobeGuru-backend/src/main/java/ScraperThread.java
rd4d8f61 rc164f8f 1 import com.fasterxml.jackson.databind.JsonNode; 2 import com.fasterxml.jackson.databind.ObjectMapper; 3 import org.openqa.selenium.By; 1 4 import org.openqa.selenium.WebDriver; 5 import org.openqa.selenium.WebElement; 2 6 import org.openqa.selenium.chrome.ChromeDriver; 3 7 import org.openqa.selenium.chrome.ChromeOptions; … … 6 10 import org.jsoup.nodes.Element; 7 11 import org.jsoup.select.Elements; 8 12 import org.openqa.selenium.support.ui.ExpectedCondition; 13 import org.openqa.selenium.support.ui.ExpectedConditions; 14 import org.openqa.selenium.support.ui.WebDriverWait; 15 16 import java.io.File; 17 import java.io.IOException; 18 import java.sql.Connection; 19 import java.sql.DriverManager; 20 import java.sql.PreparedStatement; 21 import java.sql.SQLException; 22 import java.text.ParseException; 23 import java.text.SimpleDateFormat; 24 import java.util.*; 9 25 import java.util.concurrent.ConcurrentLinkedQueue; 10 26 import java.util.concurrent.CountDownLatch; … … 12 28 public class ScraperThread extends Thread { 13 29 private String url; 14 private String destination;15 private String departureDate;16 private int numberOfPeople;17 30 private ConcurrentLinkedQueue<Option> uniqueOptions; 18 31 private CountDownLatch latch; 19 20 public ScraperThread(String url, String destination, String departureDate, int numberOfPeople, ConcurrentLinkedQueue<Option> optionsQueue, CountDownLatch latch) { 32 private Set<Option> optionSet; 33 34 public ScraperThread(String url, ConcurrentLinkedQueue<Option> optionsQueue, CountDownLatch latch) { 21 35 this.url = url; 22 this.destination = destination;23 this.departureDate = departureDate;24 this.numberOfPeople = numberOfPeople;25 36 this.uniqueOptions = optionsQueue; 26 37 this.latch = latch; 38 this.optionSet = new HashSet<>(); 39 } 40 41 private WebDriver driver; 42 43 private void initializeWebDriver() { 44 System.setProperty("webdriver.chrome.driver", "C:\\chromedriver-win64\\chromedriver.exe"); 45 ChromeOptions options = new ChromeOptions(); 46 options.setBinary("C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"); 47 options.addArguments("--headless"); 48 options.addArguments("--disable-gpu"); 49 options.addArguments("--remote-allow-origins=*"); 50 options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"); 51 driver = new ChromeDriver(options); 52 } 53 54 private void closeWebDriver() { 55 if (driver != null) { 56 driver.quit(); 57 } 27 58 } 28 59 29 60 private void connectToWeb(String queryUrl) { 30 // Selenium 31 ChromeOptions options = new ChromeOptions(); 32 options.setBinary("C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"); // Path to Brave, remove for Chrome compatibility 33 options.addArguments("--headless"); // Run in headless mode 34 options.addArguments("--disable-gpu"); 35 options.addArguments("--window-size=1920,1080"); 36 options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"); // User-Agent 37 38 // chromeDriver 39 System.setProperty("webdriver.chrome.driver", "C:\\drivers\\chromedriver.exe"); 40 System.setProperty("webdriver.http.factory", "jdk-http-client"); 41 WebDriver driver = new ChromeDriver(options); 42 try { 43 // Navigate to URL 44 driver.get(queryUrl); 45 Thread.sleep(10000); // Sleep to fetch all data 46 47 // Get page source 48 String pageSource = driver.getPageSource(); 49 System.out.println("Thread " + Thread.currentThread().getId() + " connected to " + queryUrl); 50 51 // Get only options 52 Document doc = Jsoup.parse(pageSource); 53 Element parentDiv; 54 Elements childDivs; 55 switch (url) { 56 case "https://www.fibula.com.mk/": 57 parentDiv = doc.selectFirst("div.flex.flex-col.gap-5"); 58 if (parentDiv != null) { 59 childDivs = parentDiv.select("div"); 60 for (Element div : childDivs) { 61 String data = div.html(); 62 Option option = optionParser(data); 63 if (option != null) { 64 if (uniqueOptions.add(option)) { 65 System.out.println("Parsed Option: " + option); 61 driver.get(queryUrl); 62 63 WebDriverWait wait = new WebDriverWait(driver, 40); // 40s timeout buffer 64 switch (url) { 65 case "https://booking.escapetravel.mk/": 66 wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("#hotels-container"))); 67 try { Thread.sleep(5000);} catch (InterruptedException e) { e.printStackTrace(); } 68 break; 69 case "https://magelantravel.mk/": 70 wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("div.sodrzina"))); 71 break; 72 default: 73 System.out.println("URL not recognized for waiting condition."); 74 // Handle other URLs if needed 75 } 76 77 String pageSource = driver.getPageSource(); 78 System.out.println("Connected to " + queryUrl); 79 Document doc = Jsoup.parse(pageSource); 80 Element parentDiv; 81 Elements childDivs; 82 83 switch (url) { 84 case "https://www.fibula.com.mk/": 85 parentDiv = doc.selectFirst("div.flex.flex-col.gap-5"); 86 if (parentDiv != null) { 87 childDivs = parentDiv.select("div"); 88 for (Element div : childDivs) { 89 String data = div.html(); 90 Option option = optionParser(data); 91 if (option != null && optionSet.add(option)) { 92 uniqueOptions.add(option); 93 System.out.println("Parsed " + option); 94 } 95 } 96 } else { 97 System.out.println("Parent div not found"); 98 } 99 break; 100 case "https://booking.escapetravel.mk/": 101 parentDiv = doc.selectFirst("#hotels-container"); 102 if (parentDiv != null) { 103 childDivs = parentDiv.select("a.hotel-item"); 104 for (Element div : childDivs) { 105 String data = div.outerHtml(); 106 Option option = optionParser(data); 107 if (option != null) { 108 Option existingOption = DatabaseUtil.findOption(option); 109 if (existingOption != null) { 110 if (existingOption.equals(option) || existingOption.getPrice() != option.getPrice()) { 111 option.setPriceChanged(true); 112 option.setNewPrice(option.getPrice()); 66 113 } 114 DatabaseUtil.updateOptionInDatabase(option); 115 } else if (optionSet.add(option)) { 116 uniqueOptions.add(option); 117 DatabaseUtil.saveOptionToDatabase(option); 118 System.out.println("Parsed " + option); 67 119 } 68 120 } 69 } else { 70 System.out.println("Parent div not found"); 71 } 72 break; 73 case "https://booking.escapetravel.mk/": 74 parentDiv = doc.selectFirst("div.container.pt-4.pt-md-6.scroll-into-view"); 75 Element subParent; 76 System.out.println(parentDiv); 77 if(parentDiv != null) { 78 subParent = parentDiv.selectFirst("div.row"); 79 }else{ 80 System.out.println("Parent div not found"); 81 break; 82 } 83 84 if (subParent != null) { 85 childDivs = subParent.select("div.col-md-3"); 86 87 for (Element div : childDivs) { 88 String data = div.html(); 89 Option option = optionParser(data); 90 if (option != null) { 91 if (uniqueOptions.add(option)) { 92 System.out.println("Parsed option: " + option); 121 } 122 } else { 123 System.out.println("Parent div not found"); 124 } 125 break; 126 case "https://magelantravel.mk/": 127 parentDiv = doc.selectFirst("div.sodrzina"); 128 if (parentDiv != null) { 129 childDivs = parentDiv.select("div.destinacija"); 130 System.out.println(childDivs.size()); 131 childDivs.removeIf(div -> div.attr("style").contains("display:none") || div.attr("style").contains("display: none")); 132 System.out.println("Filtered childDivs size: " + childDivs.size()); 133 for (Element div : childDivs) { 134 String data = div.outerHtml(); 135 Option newOption = optionParser(data); 136 if (newOption != null) { 137 Option existingOption = DatabaseUtil.findOption(newOption); 138 if (existingOption != null) { 139 if (existingOption.equals(newOption) || existingOption.getPrice() != newOption.getPrice()) { 140 newOption.setPriceChanged(true); 141 newOption.setNewPrice(newOption.getPrice()); 93 142 } 143 DatabaseUtil.updateOptionInDatabase(newOption); 144 } else if (optionSet.add(newOption)) { 145 uniqueOptions.add(newOption); 146 DatabaseUtil.saveOptionToDatabase(newOption); 147 System.out.println("Parsed " + newOption); 94 148 } 95 149 } 96 } else {97 System.out.println("subparent div not found"); 98 }99 break;100 }101 } catch (InterruptedException e) {102 e.printStackTrace();103 } finally {104 driver.quit();105 latch.countDown();106 } 107 } 150 } 151 152 } else { 153 System.out.println("Parent div not found"); 154 } 155 break; 156 default: 157 System.out.println("URL not recognized for parsing."); 158 } 159 } 160 161 108 162 109 163 private Option optionParser(String data) { 110 164 Document doc = Jsoup.parse(data); 111 165 Option created = new Option(); 112 113 166 switch (url) { 114 case "https:// www.fibula.com.mk/":115 created = parse Fibula(doc);167 case "https://magelantravel.mk/": 168 created = parseMagelan(doc); 116 169 break; 117 170 case "https://booking.escapetravel.mk/": … … 122 175 break; 123 176 } 124 125 177 if (created.isEmpty()) { 178 System.out.println(created); 126 179 return null; 127 180 } 128 129 181 return created; 130 182 } 131 183 132 private Option parse Fibula(Document doc) {184 private Option parseMagelan(Document doc) { 133 185 Option created = new Option(); 134 135 Element linkElement = doc.selectFirst("a[target='_blank']");136 created.setLink(linkElement != null ? url + linkElement.attr("href") : null);137 138 Element imgElement = doc.selectFirst("div. md\\:aspect-none img");139 created.setImgSrc(imgElement != null ? imgElement.attr("src") : null);140 141 Element hotelNameElement = doc.selectFirst(" h5.text-md");186 Element linkElement = doc.selectFirst("div.ponuda-sredina"); 187 int id = Integer.parseInt(linkElement.attr("data-id")); 188 int turop = Integer.parseInt(linkElement.attr("data-turop")); 189 created.setLink("https://magelantravel.mk/ponudi.php?type=1&objektid=" + id + "&turop=" + turop); 190 Element imgElement = doc.selectFirst("div.imgLiquidFill.imgLiquid.ponuda-img.zoom"); 191 created.setImgSrc(imgElement != null ? url + imgElement.attr("style") 192 .split("url\\(")[1].split("\\)")[0].replace("'", "").replace("./", "/") : null); 193 Element hotelNameElement = doc.selectFirst("div.ponuda-objekt"); 142 194 created.setHotelName(hotelNameElement != null ? hotelNameElement.text() : null); 143 144 Element countryElement = doc.selectFirst("small.text-navy"); 195 Element countryElement = doc.selectFirst("l.ponuda-lokacija"); 145 196 created.setCountry(countryElement != null ? countryElement.text() : null); 146 147 Element priceElement = doc.selectFirst("small.line-through"); 148 String price = priceElement != null ? priceElement.text().replaceAll("[^\\d.]", "") : "0"; 197 Element priceElement = doc.selectFirst("div.ponuda-cena"); 198 Element dateElement = doc.selectFirst("l.ponuda-opis.termin"); 199 created.setDateRange(dateElement != null ? dateElement.text() : null); 200 float price = Float.parseFloat(priceElement != null ? priceElement.text().replaceAll("[^\\d.]", "") : "0"); 149 201 created.setPrice(price); 150 151 202 return created; 152 203 } 153 154 204 private Option parseEscapeTravel(Document doc) { 155 205 Option created = new Option(); 156 157 // Extract link 158 Element linkElement = doc.selectFirst("a[target='_blank']"); 159 created.setLink(linkElement != null ? linkElement.attr("href") : null); 160 161 // Extract image source 162 Element imgElement = doc.selectFirst("img.card-img-top"); 163 created.setImgSrc(imgElement != null ? imgElement.attr("src") : null); 164 165 // Extract hotel name 166 Element hotelNameElement = doc.selectFirst("h3.fw-bold.text-body.mb-2"); 167 created.setHotelName(hotelNameElement != null ? hotelNameElement.text() : null); 168 169 // Extract country/location 170 Element countryElement = doc.selectFirst("h5.fw-light.text-primary.mb-1"); 171 created.setCountry(countryElement != null ? countryElement.text() : null); 172 173 // Extract price 174 Element priceElement = doc.selectFirst("h4.fw-light.text-success.mb-0"); 175 String price = priceElement != null ? priceElement.text().replaceAll("[^\\d.]", "") : "0"; 206 Element card = doc.selectFirst("a.hotel-item"); 207 String link = card.attr("href"); 208 created.setLink(link); 209 created.setImgSrc(card.attr("data-picture")); 210 created.setHotelName(card.attr("data-title")); 211 Element countryP = doc.selectFirst("p.text-info"); 212 created.setCountry(countryP != null ? countryP.text() : null); 213 Element priceElem = doc.selectFirst("span.hotel-price"); 214 String priceText = priceElem.text(); 215 float price = 0; 216 if(!priceText.isEmpty()) { 217 price = Float.parseFloat(priceText.replace("€", "")); 218 } 176 219 created.setPrice(price); 177 220 String[] queryParams = link.split("[?&]"); 221 String startDateStr = null; 222 int nights = 0; 223 for (String param : queryParams) { 224 if (param.startsWith("Date=")) { 225 startDateStr = param.split("=")[1]; 226 } 227 if (param.startsWith("Nights=")) { 228 nights = Integer.parseInt(param.split("=")[1]); 229 } 230 } 231 if (startDateStr != null && nights > 0) 232 { 233 SimpleDateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); 234 try { 235 Date startDate = dateFormat.parse(startDateStr); 236 237 Calendar calendar = Calendar.getInstance(); 238 calendar.setTime(startDate); 239 calendar.add(Calendar.DAY_OF_YEAR, nights); 240 Date endDate = calendar.getTime(); 241 String dateRange = dateFormat.format(startDate) + " - " + dateFormat.format(endDate); 242 created.setDateRange(dateRange); 243 }catch (ParseException e){ 244 e.printStackTrace(); 245 } 246 } 178 247 return created; 179 248 } 180 249 181 182 250 @Override 183 public void run() { 184 System.out.println("Thread started for url: " + url); 185 StringBuilder builder = new StringBuilder(); 186 builder.append(url); 187 String queryUrl; 188 switch (url) { 189 case "https://www.fibula.com.mk/": 190 builder.append("search?productType=2&"); // search for hotels 191 for (int i = 0; i < numberOfPeople; i++) { // add all passengers (default adults) 192 builder.append("passengers=1993-01-01&"); 193 } 194 queryUrl = builder.toString(); 195 System.out.println(queryUrl); 196 connectToWeb(queryUrl); 197 break; 198 case "https://booking.escapetravel.mk/": 199 builder.append("destinations?Category=&Search=&DateFrom="); 200 builder.append(departureDate); 201 builder.append("&Rooms=1&Adults="); 202 builder.append(numberOfPeople); 203 queryUrl = builder.toString(); 204 System.out.println(queryUrl); 205 connectToWeb(queryUrl); 206 break; 207 default: 208 System.out.println("Not available for current url"); 209 latch.countDown(); 210 break; 211 } 212 } 213 } 251 public void run() { 252 System.out.println("Thread started for url: " + url); 253 initializeWebDriver(); 254 if ("https://magelantravel.mk/".equals(url)) { 255 ObjectMapper mapper = new ObjectMapper(); 256 try { 257 ClassLoader classLoader = getClass().getClassLoader(); 258 JsonNode root = mapper.readTree(new File(classLoader.getResource("CountriesList.json").getFile())); 259 JsonNode countries = root.get("countries"); 260 SimpleDateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); 261 Calendar calendar = Calendar.getInstance(); 262 calendar.add(Calendar.DAY_OF_YEAR, 1); 263 264 for (int i = 0; i < 90; i++) { // next three months 265 String date = dateFormat.format(calendar.getTime()); 266 for (JsonNode countryNode : countries) { 267 String country = countryNode.asText(); 268 for (int nokevanja = 2; nokevanja <= 10; nokevanja++) { 269 String queryUrl = url + "/destinacii?ah_tip=1&iframe=&affiliate_code=&carter_id=0&carter_region=&carter_dataod=&carter_datado=&destinacija=" + country + "&oddatum=" + date + "&nokevanja=" + nokevanja + "&dodatum=&broj_vozrasni=2&broj_deca=0&spdete1=0&spdete2=0&spdete3=0&spdete4=0"; 270 connectToWeb(queryUrl); 271 } 272 } 273 calendar.add(Calendar.DAY_OF_YEAR, 1); // next day 274 } 275 276 } catch (IOException e) { 277 e.printStackTrace(); 278 } 279 } else if ("https://booking.escapetravel.mk/".equals(url)) { 280 ObjectMapper mapper = new ObjectMapper(); 281 try { 282 ClassLoader classLoader = getClass().getClassLoader(); 283 JsonNode root = mapper.readTree(new File(classLoader.getResource("CountriesList.json").getFile())); 284 JsonNode countries = root.get("countries"); // Assuming "destinations" key in JSON 285 SimpleDateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); 286 Calendar calendar = Calendar.getInstance(); 287 calendar.add(Calendar.DAY_OF_YEAR, 1); 288 289 for (int i = 0; i < 90; i++) { // next three months 290 String date = dateFormat.format(calendar.getTime()); 291 for (JsonNode countryNode : countries) { 292 String country = countryNode.asText(); 293 for(int nokevanja = 2; nokevanja <=10; nokevanja ++) { 294 String queryUrl = url + "/hotels?Search=" + country + "&Date=" + date + "&Nights=" + nokevanja + "&Rooms=1&Adults=2"; 295 connectToWeb(queryUrl); 296 } 297 } 298 calendar.add(Calendar.DAY_OF_YEAR, 1); // next day 299 } 300 } catch (IOException e) { 301 e.printStackTrace(); 302 } 303 } else { 304 // Handle other URLs 305 } 306 closeWebDriver(); 307 latch.countDown(); 308 } 309 310 } -
backend/GlobeGuru-backend/src/main/resources/URLsJSON.json
rd4d8f61 rc164f8f 2 2 "agencyurls": 3 3 [ 4 "https:// www.fibula.com.mk/",4 "https://magelantravel.mk/", 5 5 "https://booking.escapetravel.mk/" 6 6 ]
Note:
See TracChangeset
for help on using the changeset viewer.