Changeset 47f4eaf for phonelux_scrappers/scrappers/mobitech_scrapper.py
- Timestamp:
- 11/20/22 16:34:52 (2 years ago)
- Branches:
- master
- Parents:
- ffd50db
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/mobitech_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 14 15 sys.stdout = open(file_path, "w") 15 16 16 17 mobitech_url = "https://mobitech.mk/shop/"18 19 response1 = requests.get(mobitech_url)20 21 soup1 = BeautifulSoup(response1.content, 'html.parser')22 23 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})24 25 17 offer_shop = "Mobitech" # offer shop 26 18 last_updated = datetime.now().date() 27 19 is_validated = False 28 20 29 # Mobitech phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 30 30 31 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text)) 31 try: 32 mobitech_url = "https://mobitech.mk/shop/" 32 33 33 database_offers = [] 34 response1 = requests.get(mobitech_url) 34 35 35 for offer in offers: 36 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 37 offer['ram_memory'], 38 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 39 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 40 offer['image_url'], 41 offer['offer_url'], offer['last_updated'], offer['is_validated'], 42 offer['offer_description'], 43 offer['offer_shop_code']) 44 database_offers.append(phoneOffer) 36 soup1 = BeautifulSoup(response1.content, 'html.parser') 45 37 46 new_offers = [] 38 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'}) 47 39 48 for phone in phones: 49 offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href') # url 50 image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src') # image 51 brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip() # brand 52 offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip() # offer_name 53 if brand not in offer_name: 54 offer_name = brand+" "+offer_name 55 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi') 56 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price 40 # Mobitech phone offers that are already in database 41 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text)) 57 42 58 response2 = requests.get(offer_url) 59 soup2 = BeautifulSoup(response2.content, 'html.parser') 43 database_offers = [] 60 44 61 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'}) 45 for offer in offers: 46 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 47 offer['ram_memory'], 48 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 49 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 50 offer['image_url'], 51 offer['offer_url'], offer['last_updated'], offer['is_validated'], 52 offer['offer_description'], 53 offer['offer_shop_code']) 54 database_offers.append(phoneOffer) 62 55 63 ram_memory = None 64 rom_memory = None 65 battery = None 66 back_camera = None 67 front_camera = None 68 operating_system = None 69 chipset = None 70 color = None 71 offer_shop_code = None 72 cpu = None 73 offer_description = None 56 new_offers = [] 74 57 75 for specification in specifications: 76 # rom memory 77 if specification.get_text().startswith("Меморија:"): 78 rom_memory = specification.get_text().split("Меморија:")[1].strip() 79 if rom_memory == "Нема" or rom_memory == "/": 80 rom_memory = None 58 for phone in phones: 59 offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href') # url 60 image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src') # image 61 brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip() # brand 62 offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip() # offer_name 63 if brand not in offer_name: 64 offer_name = brand+" "+offer_name 65 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi') 66 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price 81 67 82 # ram memory 83 if specification.get_text().startswith("РАМ Меморија:"): 84 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\ 85 .replace('Ram', '').strip() 86 if ram_memory == "Нема" or ram_memory == "/": 87 ram_memory = None 68 response2 = requests.get(offer_url) 69 soup2 = BeautifulSoup(response2.content, 'html.parser') 88 70 89 # camera 90 if specification.get_text().startswith("Камера:"): 91 back_camera = specification.get_text().split("Камера:")[1].strip() 92 if back_camera == "Нема": 93 back_camera = None 71 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'}) 94 72 95 # operating system 96 if specification.get_text().startswith("Оперативен систем:"): 97 operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip() 98 if operating_system == "Нема": 99 operating_system = None 73 ram_memory = None 74 rom_memory = None 75 battery = None 76 back_camera = None 77 front_camera = None 78 operating_system = None 79 chipset = None 80 color = None 81 offer_shop_code = None 82 cpu = None 83 offer_description = None 100 84 101 # battery 102 if specification.get_text().startswith("Батерија:"): 103 battery = specification.get_text().split("Батерија:")[1].strip() 104 if battery == "Нема": 105 battery = None 85 for specification in specifications: 86 # rom memory 87 if specification.get_text().startswith("Меморија:"): 88 rom_memory = specification.get_text().split("Меморија:")[1].strip() 89 if rom_memory == "Нема" or rom_memory == "/": 90 rom_memory = None 106 91 107 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 108 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 109 image_url, 110 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 92 # ram memory 93 if specification.get_text().startswith("РАМ Меморија:"): 94 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\ 95 .replace('Ram', '').strip() 96 if ram_memory == "Нема" or ram_memory == "/": 97 ram_memory = None 111 98 112 for new_offer in new_offers: 113 flag = False 114 flag_price = False 115 offer_id = None 99 # camera 100 if specification.get_text().startswith("Камера:"): 101 back_camera = specification.get_text().split("Камера:")[1].strip() 102 if back_camera == "Нема": 103 back_camera = None 104 105 # operating system 106 if specification.get_text().startswith("Оперативен систем:"): 107 operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip() 108 if operating_system == "Нема": 109 operating_system = None 110 111 # battery 112 if specification.get_text().startswith("Батерија:"): 113 battery = specification.get_text().split("Батерија:")[1].strip() 114 if battery == "Нема": 115 battery = None 116 117 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 118 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 119 image_url, 120 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 121 122 for new_offer in new_offers: 123 flag = False 124 flag_price = False 125 offer_id = None 126 127 for old_offer in database_offers: 128 129 if new_offer.offer_name == old_offer.offer_name: 130 flag = True 131 if new_offer.price != old_offer.price: 132 flag_price = True 133 offer_id = old_offer.offer_id 134 135 if flag: 136 print('ALREADY IN DATABASE') 137 print(new_offer) 138 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 139 if flag_price: 140 print('PRICE CHANGED!') # CHANGE PRICE 141 print('offer id: ' + str(offer_id)) 142 headers = {'Content-type': 'application/json'} 143 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 144 headers=headers) 145 else: 146 print('ADDED') # ADD OFFER 147 print(new_offer) 148 headers = {'Content-type': 'application/json'} 149 requests.post('http://localhost:8080/phoneoffer/addoffer', 150 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 151 152 print('------------------------------------') 116 153 117 154 for old_offer in database_offers: 155 flag = False 156 for new_offer in new_offers: 157 if old_offer.offer_name == new_offer.offer_name: 158 flag = True 118 159 119 if new_offer.offer_name == old_offer.offer_name: 120 flag = True 121 if new_offer.price != old_offer.price: 122 flag_price = True 123 offer_id = old_offer.offer_id 160 if not flag: 161 print('OFFER DELETED') 162 print(old_offer) 163 # DELETE OFFER 164 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 165 except Exception: 166 traceback.print_exc() 167 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 168 ' VALUES (%s, %s, %s);' 169 insert_value = (offer_shop, last_updated, 'failed') 170 cur.execute(insert_script, insert_value) 171 db_connection.commit() 172 cur.close() 173 db_connection.close() 174 else: 175 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 176 ' VALUES (%s, %s, %s);' 177 insert_value = (offer_shop, last_updated, 'success') 178 cur.execute(insert_script, insert_value) 179 db_connection.commit() 180 cur.close() 181 db_connection.close() 124 182 125 if flag:126 print('ALREADY IN DATABASE')127 print(new_offer)128 # if it's already in database, check PRICE and if it's changed, change it !!!!!!129 if flag_price:130 print('PRICE CHANGED!') # CHANGE PRICE131 print('offer id: ' + str(offer_id))132 headers = {'Content-type': 'application/json'}133 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),134 headers=headers)135 else:136 print('ADDED') # ADD OFFER137 print(new_offer)138 headers = {'Content-type': 'application/json'}139 requests.post('http://localhost:8080/phoneoffer/addoffer',140 headers=headers, data=json.dumps(new_offer.__dict__, default=str))141 142 print('------------------------------------')143 144 for old_offer in database_offers:145 flag = False146 for new_offer in new_offers:147 if old_offer.offer_name == new_offer.offer_name:148 flag = True149 150 if not flag:151 print('OFFER DELETED')152 print(old_offer)153 # DELETE OFFER154 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))155
Note:
See TracChangeset
for help on using the changeset viewer.