- Timestamp:
- 01/10/25 19:07:51 (5 days ago)
- Branches:
- master
- Children:
- cd64b06
- Parents:
- 53bad7e
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
backend/GlobeGuru-backend/src/main/java/ScraperThread.java
r53bad7e r1c51912 39 39 } 40 40 41 p rivateWebDriver driver;41 public WebDriver driver; 42 42 43 43 private void initializeWebDriver() { … … 70 70 wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("div.sodrzina"))); 71 71 break; 72 default:73 System.out.println("URL not recognized for waiting condition.");74 // Handle other URLs if needed75 72 } 76 73 … … 92 89 Option existingOption = DatabaseUtil.findOption(option); 93 90 if (existingOption != null) { 94 if (existingOption.equals(option) || existingOption.getPrice() != option.getPrice()) {91 if (existingOption.equals(option)) { 95 92 option.setPriceChanged(true); 96 93 option.setNewPrice(option.getPrice()); … … 112 109 if (parentDiv != null) { 113 110 childDivs = parentDiv.select("div.destinacija"); 114 System.out.println(childDivs.size());115 111 childDivs.removeIf(div -> div.attr("style").contains("display:none") || div.attr("style").contains("display: none")); 116 112 System.out.println("Filtered childDivs size: " + childDivs.size()); … … 119 115 Option newOption = optionParser(data,numPeople); 120 116 if (newOption != null) { 121 Option existingOption = DatabaseUtil.findOption(newOption); 122 if (existingOption != null) { 123 if (existingOption.equals(newOption) || existingOption.getPrice() != newOption.getPrice()) { 124 newOption.setPriceChanged(true); 125 newOption.setNewPrice(newOption.getPrice()); 126 } 127 DatabaseUtil.updateOptionInDatabase(newOption); 128 } else if (optionSet.add(newOption)) { 117 if (optionSet.add(newOption)) { 129 118 uniqueOptions.add(newOption); 130 DatabaseUtil.saveOptionToDatabase(newOption); 119 120 newOption.setId(DatabaseUtil.saveOptionToDatabase(newOption)); 121 scrapeOptionInfo(newOption); 131 122 System.out.println("Parsed " + newOption); 132 123 } … … 142 133 } 143 134 } 144 145 146 147 private Option optionParser(String data, int numPeople) { 135 private void scrapeOptionInfo(Option option) { 136 String url = option.getLink(); 137 if(url.contains("magelantravel.mk")) { 138 System.out.println("Scraping info for " + option.getHotelName()); 139 String[] dates = option.getDateRange().split(" - "); 140 url += "&checkin=" + dates[0] + "&checkout=" + dates[1] + "&adult=" + option.getNumPeople(); 141 142 driver.get(url); 143 try { Thread.sleep(5000); } catch (InterruptedException e) { e.printStackTrace(); } //data fetch 144 String pageSource = driver.getPageSource(); 145 Document doc = Jsoup.parse(pageSource); 146 Elements roomOptions = doc.select(".tblroom > tbody > tr"); 147 for (Element roomOption : roomOptions) { 148 String type = roomOption.select("a.tblroom-type").text(); 149 150 String board = roomOption.select(".rezervacija-objekt").text(); 151 if(board.length() > 2){ 152 board = board.substring(0,2); 153 } 154 if(board.isEmpty() || type.isEmpty()){ 155 continue; 156 } 157 Elements amenityElement = roomOption.select(".objekt-opis"); 158 String amenity = (amenityElement != null ? amenityElement.text() : ""); 159 System.out.println(amenity + " " + board + " " + type ); 160 String priceText = roomOption.select(".tbl-cena").text().replace("€", "").trim(); 161 float price; 162 if (!priceText.isEmpty()) { 163 price = Float.parseFloat(priceText); 164 }else continue; 165 166 DatabaseUtil.saveOptionDetails(option.getId(), type,board,amenity, price); 167 } 168 } 169 } 170 private Option optionParser(String data, int numPeople){ 148 171 Document doc = Jsoup.parse(data); 149 172 Option created = new Option(); … … 162 185 } 163 186 if (created.isEmpty()) { 164 System.out.println(created);165 187 return null; 166 188 } 189 //scrapeOptionInfo(created); 167 190 return created; 168 191 } … … 181 204 Element countryElement = doc.selectFirst("l.ponuda-lokacija"); 182 205 created.setCountry(countryElement != null ? countryElement.text() : null); 183 Element priceElement = doc.selectFirst("div.ponuda-cena");206 //Element priceElement = doc.selectFirst("div.ponuda-cena"); 184 207 Element dateElement = doc.selectFirst("l.ponuda-opis.termin"); 185 208 created.setDateRange(dateElement != null ? dateElement.text() : null); 186 float price = Float.parseFloat(priceElement != null ? priceElement.text().replaceAll("[^\\d.]", "") : "0");187 created.setPrice(price); 209 /*float price = Float.parseFloat(priceElement != null ? priceElement.text().replaceAll("[^\\d.]", "") : "0"); 210 created.setPrice(price);*/ 188 211 return created; 189 212 } … … 198 221 String country = countryP.text().replaceAll("leto hoteli", ""); 199 222 created.setCountry(country); 200 Element priceElem = doc.selectFirst("span.hotel-price");223 /*Element priceElem = doc.selectFirst("span.hotel-price"); 201 224 String priceText = priceElem.text(); 202 225 float price = 0; … … 204 227 price = Float.parseFloat(priceText.replace("€", "")); 205 228 } 206 created.setPrice(price); 229 created.setPrice(price);*/ 207 230 String[] queryParams = link.split("[?&]"); 208 231 String startDateStr = null;
Note:
See TracChangeset
for help on using the changeset viewer.