Changeset 47f4eaf for phonelux_scrappers/scrappers/a1_scrapper.py
- Timestamp:
- 11/20/22 16:34:52 (2 years ago)
- Branches:
- master
- Parents:
- ffd50db
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/a1_scrapper.py
rffd50db r47f4eaf 1 import traceback 1 2 import unicodedata 2 3 from datetime import datetime … … 18 19 is_validated = False 19 20 20 # A1 phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 21 30 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text)) 31 try: 32 # A1 phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text)) 23 34 24 database_offers = []35 database_offers = [] 25 36 26 for offer in offers:27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],28 offer['ram_memory'],29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],31 offer['image_url'],32 offer['offer_url'], offer['last_updated'], offer['is_validated'],33 offer['offer_description'],34 offer['offer_shop_code'])35 database_offers.append(phoneOffer)37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 36 47 37 a1_url = 'https://www.a1.mk/webshop/mk/phones'48 a1_url = 'https://www.a1.mk/webshop/mk/phones' 38 49 39 response1 = requests.get(a1_url)40 soup1 = BeautifulSoup(response1.content, 'html.parser')50 response1 = requests.get(a1_url) 51 soup1 = BeautifulSoup(response1.content, 'html.parser') 41 52 42 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \43 .find_all('div', {'class', 'dvc-idtfr by4'})53 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \ 54 .find_all('div', {'class', 'dvc-idtfr by4'}) 44 55 45 new_offers = []56 new_offers = [] 46 57 47 for phone in phones:48 brand = phone.get('data-brand').strip()49 offer_name = brand + " " + phone.get('data-model').strip()58 for phone in phones: 59 brand = phone.get('data-brand').strip() 60 offer_name = brand + " " + phone.get('data-model').strip() 50 61 51 # if brand not in offer_name:52 # offer_name = brand+" "+offer_name62 # if brand not in offer_name: 63 # offer_name = brand+" "+offer_name 53 64 54 offer_shop_code = phone.get('data-productid').strip()55 offer_url = phone.find('a', {'class', 'device-link'}).get('href')56 image_url = phone.get('data-image')65 offer_shop_code = phone.get('data-productid').strip() 66 offer_url = phone.find('a', {'class', 'device-link'}).get('href') 67 image_url = phone.get('data-image') 57 68 58 response2 = requests.get(offer_url)59 soup2 = BeautifulSoup(response2.content, 'html.parser')69 response2 = requests.get(offer_url) 70 soup2 = BeautifulSoup(response2.content, 'html.parser') 60 71 61 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \62 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')72 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \ 73 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div') 63 74 64 # offer price65 price = None66 for temp_price in temp_prices:67 if 'Цена само за уред' in temp_price.get_text().strip():68 price = int(temp_price.get_text().replace('Цена само за уред', '')69 .replace('Одбери', '').replace('денари', '').replace('.', '').strip())75 # offer price 76 price = None 77 for temp_price in temp_prices: 78 if 'Цена само за уред' in temp_price.get_text().strip(): 79 price = int(temp_price.get_text().replace('Цена само за уред', '') 80 .replace('Одбери', '').replace('денари', '').replace('.', '').strip()) 70 81 71 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')82 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label') 72 83 73 temp_colors = []74 for color_section in colors_section:75 temp_colors.append(color_section.get('data-content'))84 temp_colors = [] 85 for color_section in colors_section: 86 temp_colors.append(color_section.get('data-content')) 76 87 77 color = ','.join(temp_colors) # colors available for the offer88 color = ','.join(temp_colors) # colors available for the offer 78 89 79 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()90 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip() 80 91 81 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')92 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr') 82 93 83 back_camera = None84 operating_system = None85 cpu = None86 rom_memory = None87 ram_memory = None88 battery = None89 front_camera = None90 chipset = None91 offer_description = None94 back_camera = None 95 operating_system = None 96 cpu = None 97 rom_memory = None 98 ram_memory = None 99 battery = None 100 front_camera = None 101 chipset = None 102 offer_description = None 92 103 93 for row in table_rows:94 if 'Камера' in row.get_text().strip():95 back_camera = row.get_text().replace('Камера', '').strip()104 for row in table_rows: 105 if 'Камера' in row.get_text().strip(): 106 back_camera = row.get_text().replace('Камера', '').strip() 96 107 97 if 'Оперативен систем' in row.get_text().strip():98 operating_system = row.get_text().replace('Оперативен систем', '').strip()108 if 'Оперативен систем' in row.get_text().strip(): 109 operating_system = row.get_text().replace('Оперативен систем', '').strip() 99 110 100 if 'CPU' in row.get_text().strip():101 cpu = row.get_text().replace('CPU', '').strip()111 if 'CPU' in row.get_text().strip(): 112 cpu = row.get_text().replace('CPU', '').strip() 102 113 103 if 'Вградена меморија' in row.get_text().strip():104 rom_memory = row.get_text().replace('Вградена меморија', '').strip()114 if 'Вградена меморија' in row.get_text().strip(): 115 rom_memory = row.get_text().replace('Вградена меморија', '').strip() 105 116 106 if 'RAM меморија' in row.get_text().strip():107 ram_memory = row.get_text().replace('RAM меморија', '').strip()117 if 'RAM меморија' in row.get_text().strip(): 118 ram_memory = row.get_text().replace('RAM меморија', '').strip() 108 119 109 if 'Батерија' in row.get_text().strip():110 battery = row.get_text().replace('Батерија', '').strip()120 if 'Батерија' in row.get_text().strip(): 121 battery = row.get_text().replace('Батерија', '').strip() 111 122 112 if 'Предна камера' in row.get_text().strip():113 front_camera = row.get_text().replace('Предна камера', '').strip()123 if 'Предна камера' in row.get_text().strip(): 124 front_camera = row.get_text().replace('Предна камера', '').strip() 114 125 115 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 116 color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url, 117 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 126 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 127 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 128 image_url, 129 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 118 130 119 for new_offer in new_offers: 120 flag = False 121 flag_price = False 122 offer_id = None 131 for new_offer in new_offers: 132 flag = False 133 flag_price = False 134 offer_id = None 135 136 for old_offer in database_offers: 137 138 if new_offer.offer_shop_code == old_offer.offer_shop_code: 139 flag = True 140 if new_offer.price != old_offer.price: 141 flag_price = True 142 offer_id = old_offer.offer_id 143 144 if flag: 145 # print('ALREADY IN DATABASE') 146 # print(new_offer) 147 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 148 if flag_price: 149 print('PRICE CHANGED!') # CHANGE PRICE 150 print('offer id: ' + str(offer_id)) 151 headers = {'Content-type': 'application/json'} 152 requests.put( 153 'http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 154 headers=headers) 155 else: 156 print('ADDED') # ADD OFFER 157 print(new_offer) 158 headers = {'Content-type': 'application/json'} 159 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, 160 data=json.dumps(new_offer.__dict__, 161 default=str)) 162 163 print('------------------------------------') 123 164 124 165 for old_offer in database_offers: 166 flag = False 167 for new_offer in new_offers: 168 if old_offer.offer_shop_code == new_offer.offer_shop_code: 169 flag = True 125 170 126 if new_offer.offer_shop_code == old_offer.offer_shop_code: 127 flag = True 128 if new_offer.price != old_offer.price: 129 flag_price = True 130 offer_id = old_offer.offer_id 131 132 if flag: 133 # print('ALREADY IN DATABASE') 134 # print(new_offer) 135 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 136 if flag_price: 137 print('PRICE CHANGED!') # CHANGE PRICE 138 print('offer id: ' + str(offer_id)) 139 headers = {'Content-type': 'application/json'} 140 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 141 headers=headers) 142 else: 143 print('ADDED') # ADD OFFER 144 print(new_offer) 145 headers = {'Content-type': 'application/json'} 146 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__, 147 default=str)) 148 149 print('------------------------------------') 150 151 for old_offer in database_offers: 152 flag = False 153 for new_offer in new_offers: 154 if old_offer.offer_shop_code == new_offer.offer_shop_code: 155 flag = True 156 157 if not flag: 158 print('OFFER DELETED') 159 print(old_offer) 160 # DELETE OFFER 161 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 171 if not flag: 172 print('OFFER DELETED') 173 print(old_offer) 174 # DELETE OFFER 175 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 176 except Exception: 177 traceback.print_exc() 178 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 179 ' VALUES (%s, %s, %s);' 180 insert_value = (offer_shop, last_updated, 'failed') 181 cur.execute(insert_script, insert_value) 182 db_connection.commit() 183 cur.close() 184 db_connection.close() 185 else: 186 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 187 ' VALUES (%s, %s, %s);' 188 insert_value = (offer_shop, last_updated, 'success') 189 cur.execute(insert_script, insert_value) 190 db_connection.commit() 191 cur.close() 192 db_connection.close()
Note:
See TracChangeset
for help on using the changeset viewer.