Changeset 47f4eaf for phonelux_scrappers/scrappers/setec_scrapper.py
- Timestamp:
- 11/20/22 16:34:52 (2 years ago)
- Branches:
- master
- Parents:
- ffd50db
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/setec_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 17 18 is_validated = False 18 19 19 # Setec phone offers that are already in database 20 # Call to read the configuration file and connect to database 21 cinfo = config_read.get_databaseconfig("../postgresdb.config") 22 db_connection = psycopg2.connect( 23 database=cinfo[0], 24 host=cinfo[1], 25 user=cinfo[2], 26 password=cinfo[3] 27 ) 28 cur = db_connection.cursor() 20 29 21 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text)) 30 try: 31 # Setec phone offers that are already in database 32 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text)) 22 33 23 database_offers = []34 database_offers = [] 24 35 25 for offer in offers:26 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],27 offer['ram_memory'],28 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],29 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],30 offer['image_url'],31 offer['offer_url'], offer['last_updated'], offer['is_validated'],32 offer['offer_description'],33 offer['offer_shop_code'])34 database_offers.append(phoneOffer)36 for offer in offers: 37 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 38 offer['ram_memory'], 39 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 40 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 41 offer['image_url'], 42 offer['offer_url'], offer['last_updated'], offer['is_validated'], 43 offer['offer_description'], 44 offer['offer_shop_code']) 45 database_offers.append(phoneOffer) 35 46 36 new_offers = []47 new_offers = [] 37 48 38 for i in range(1, 9):39 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)49 for i in range(1, 9): 50 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i) 40 51 41 response1 = requests.get(setec_url)42 soup1 = BeautifulSoup(response1.content, 'html.parser')52 response1 = requests.get(setec_url) 53 soup1 = BeautifulSoup(response1.content, 'html.parser') 43 54 44 phones = soup1.find('div', {'id': 'mfilter-content-container'}) \45 .find_all('div', {'class': 'col-sm-4 col-xs-6'})55 phones = soup1.find('div', {'id': 'mfilter-content-container'}) \ 56 .find_all('div', {'class': 'col-sm-4 col-xs-6'}) 46 57 47 for phone in phones:48 offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')49 image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')50 offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()51 brand = offer_name.split(' ')[0]58 for phone in phones: 59 offer_url = phone.find('div', {'class': 'left'}).find('a').get('href') 60 image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src') 61 offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip() 62 brand = offer_name.split(' ')[0] 52 63 53 back_camera = None54 operating_system = None55 chipset = None56 battery = None57 ram_memory = None58 rom_memory = None59 cpu = None60 front_camera = None61 color = None64 back_camera = None 65 operating_system = None 66 chipset = None 67 battery = None 68 ram_memory = None 69 rom_memory = None 70 cpu = None 71 front_camera = None 72 color = None 62 73 63 if 'Cable' in offer_name or 'AirTag' in offer_name:64 continue74 if 'Cable' in offer_name or 'AirTag' in offer_name: 75 continue 65 76 66 if brand not in offer_name:67 offer_name = brand + " " + offer_name77 if brand not in offer_name: 78 offer_name = brand + " " + offer_name 68 79 69 offer_shop_code = phone.find('div', {'class': 'right'}) \70 .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()80 offer_shop_code = phone.find('div', {'class': 'right'}) \ 81 .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip() 71 82 72 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \73 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})83 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 84 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'}) 74 85 75 if price_tag is None:76 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \77 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})86 if price_tag is None: 87 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 88 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'}) 78 89 79 price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())90 price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip()) 80 91 81 response2 = requests.get(offer_url)82 soup2 = BeautifulSoup(response2.content, 'html.parser')92 response2 = requests.get(offer_url) 93 soup2 = BeautifulSoup(response2.content, 'html.parser') 83 94 84 offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')95 offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n') 85 96 86 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,87 color, front_camera, back_camera, chipset, battery, operating_system, cpu,88 image_url,89 offer_url, last_updated, is_validated, offer_description, offer_shop_code))97 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 98 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 99 image_url, 100 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 90 101 91 for new_offer in new_offers: 92 flag = False 93 flag_price = False 94 offer_id = None 102 for new_offer in new_offers: 103 flag = False 104 flag_price = False 105 offer_id = None 106 107 for old_offer in database_offers: 108 109 if new_offer.offer_shop_code == old_offer.offer_shop_code: 110 flag = True 111 if new_offer.price != old_offer.price: 112 flag_price = True 113 offer_id = old_offer.offer_id 114 115 if flag: 116 # print('ALREADY IN DATABASE') 117 # print(new_offer) 118 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 119 if flag_price: 120 print('PRICE CHANGED!') # CHANGE PRICE 121 print('offer id: ' + str(offer_id)) 122 headers = {'Content-type': 'application/json'} 123 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 124 headers=headers) 125 else: 126 print('ADDED') # ADD OFFER 127 print(new_offer) 128 headers = {'Content-type': 'application/json'} 129 requests.post('http://localhost:8080/phoneoffer/addoffer', 130 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 131 132 print('------------------------------------') 95 133 96 134 for old_offer in database_offers: 135 flag = False 136 for new_offer in new_offers: 137 if old_offer.offer_shop_code == new_offer.offer_shop_code: 138 flag = True 97 139 98 if new_offer.offer_shop_code == old_offer.offer_shop_code: 99 flag = True 100 if new_offer.price != old_offer.price: 101 flag_price = True 102 offer_id = old_offer.offer_id 103 104 if flag: 105 # print('ALREADY IN DATABASE') 106 # print(new_offer) 107 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 108 if flag_price: 109 print('PRICE CHANGED!') # CHANGE PRICE 110 print('offer id: ' + str(offer_id)) 111 headers = {'Content-type': 'application/json'} 112 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 113 headers=headers) 114 else: 115 print('ADDED') # ADD OFFER 116 print(new_offer) 117 headers = {'Content-type': 'application/json'} 118 requests.post('http://localhost:8080/phoneoffer/addoffer', 119 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 120 121 print('------------------------------------') 122 123 for old_offer in database_offers: 124 flag = False 125 for new_offer in new_offers: 126 if old_offer.offer_shop_code == new_offer.offer_shop_code: 127 flag = True 128 129 if not flag: 130 print('OFFER DELETED') 131 print(old_offer) 132 # DELETE OFFER 133 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 140 if not flag: 141 print('OFFER DELETED') 142 print(old_offer) 143 # DELETE OFFER 144 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 145 except Exception: 146 traceback.print_exc() 147 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 148 ' VALUES (%s, %s, %s);' 149 insert_value = (offer_shop, last_updated, 'failed') 150 cur.execute(insert_script, insert_value) 151 db_connection.commit() 152 cur.close() 153 db_connection.close() 154 else: 155 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 156 ' VALUES (%s, %s, %s);' 157 insert_value = (offer_shop, last_updated, 'success') 158 cur.execute(insert_script, insert_value) 159 db_connection.commit() 160 cur.close() 161 db_connection.close()
Note:
See TracChangeset
for help on using the changeset viewer.