Changeset 47f4eaf for phonelux_scrappers/scrappers/akcija_scrapper.py
- Timestamp:
- 11/20/22 16:34:52 (2 years ago)
- Branches:
- master
- Parents:
- ffd50db
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/akcija_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 from datetime import datetime 3 4 … … 18 19 is_validated = False 19 20 20 # Akcija phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 21 30 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text)) 31 try: 32 # Akcija phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text)) 23 34 24 database_offers = []35 database_offers = [] 25 36 26 for offer in offers:27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],28 offer['ram_memory'],29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],31 offer['image_url'],32 offer['offer_url'], offer['last_updated'], offer['is_validated'],33 offer['offer_description'],34 offer['offer_shop_code'])35 database_offers.append(phoneOffer)37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 36 47 37 new_offers = []48 new_offers = [] 38 49 39 i = 040 while i <= 20:41 akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"42 response1 = requests.get(akcija_url)43 response1.encoding = 'utf-8'44 soup1 = BeautifulSoup(response1.text, 'html.parser')50 i = 0 51 while i <= 20: 52 akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni" 53 response1 = requests.get(akcija_url) 54 response1.encoding = 'utf-8' 55 soup1 = BeautifulSoup(response1.text, 'html.parser') 45 56 46 phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})57 phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'}) 47 58 48 for phone in phones:49 offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \50 .get_text().replace('Паметен телефон', '').strip()51 brand = offer_name.split(' ')[0]59 for phone in phones: 60 offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \ 61 .get_text().replace('Паметен телефон', '').strip() 62 brand = offer_name.split(' ')[0] 52 63 53 if brand not in offer_name:54 offer_name = brand + " " + offer_name64 if brand not in offer_name: 65 offer_name = brand + " " + offer_name 55 66 56 offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')57 image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')58 price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \59 .find('ins').get_text().split(' ')[0].strip())67 offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href') 68 image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src') 69 price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \ 70 .find('ins').get_text().split(' ')[0].strip()) 60 71 61 response2 = requests.get(offer_url)62 response2.encoding = 'utf-8'63 soup2 = BeautifulSoup(response2.text, 'html.parser')72 response2 = requests.get(offer_url) 73 response2.encoding = 'utf-8' 74 soup2 = BeautifulSoup(response2.text, 'html.parser') 64 75 65 back_camera = None66 operating_system = None67 chipset = None68 battery = None69 ram_memory = None70 rom_memory = None71 cpu = None72 front_camera = None73 color = None74 offer_shop_code = None76 back_camera = None 77 operating_system = None 78 chipset = None 79 battery = None 80 ram_memory = None 81 rom_memory = None 82 cpu = None 83 front_camera = None 84 color = None 85 offer_shop_code = None 75 86 76 specifications = soup2.find('main', {'id': 'content'}) \77 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \78 .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')87 specifications = soup2.find('main', {'id': 'content'}) \ 88 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \ 89 .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p') 79 90 80 offer_description = ''81 for specification in specifications:82 if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):83 continue84 offer_description += unicodedata.normalize('NFKD',85 str(specification.get_text(separator='\n').strip())) + "\n"91 offer_description = '' 92 for specification in specifications: 93 if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()): 94 continue 95 offer_description += unicodedata.normalize('NFKD', 96 str(specification.get_text(separator='\n').strip())) + "\n" 86 97 87 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,88 color, front_camera, back_camera, chipset, battery, operating_system, cpu,89 image_url,90 offer_url, last_updated, is_validated, offer_description, offer_shop_code))91 i += 2098 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 99 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 100 image_url, 101 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 102 i += 20 92 103 93 for new_offer in new_offers: 94 flag = False 95 flag_price = False 96 offer_id = None 104 for new_offer in new_offers: 105 flag = False 106 flag_price = False 107 offer_id = None 108 109 for old_offer in database_offers: 110 111 if new_offer.offer_name == old_offer.offer_name: 112 flag = True 113 if new_offer.price != old_offer.price: 114 flag_price = True 115 offer_id = old_offer.offer_id 116 117 if flag: 118 # print('ALREADY IN DATABASE') 119 # print(new_offer) 120 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 121 if flag_price: 122 print('PRICE CHANGED!') # CHANGE PRICE 123 print('offer id: ' + str(offer_id)) 124 headers = {'Content-type': 'application/json'} 125 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 126 headers=headers) 127 else: 128 print('ADDED') # ADD OFFER 129 print(new_offer) 130 headers = {'Content-type': 'application/json'} 131 requests.post('http://localhost:8080/phoneoffer/addoffer', 132 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 133 134 print('------------------------------------') 97 135 98 136 for old_offer in database_offers: 137 flag = False 138 for new_offer in new_offers: 139 if old_offer.offer_name == new_offer.offer_name: 140 flag = True 99 141 100 if new_offer.offer_name == old_offer.offer_name: 101 flag = True 102 if new_offer.price != old_offer.price: 103 flag_price = True 104 offer_id = old_offer.offer_id 142 if not flag: 143 print('OFFER DELETED') 144 print(old_offer) 145 # DELETE OFFER 146 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 147 except Exception: 148 traceback.print_exc() 149 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 150 ' VALUES (%s, %s, %s);' 151 insert_value = (offer_shop, last_updated, 'failed') 152 cur.execute(insert_script, insert_value) 153 db_connection.commit() 154 cur.close() 155 db_connection.close() 156 else: 157 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 158 ' VALUES (%s, %s, %s);' 159 insert_value = (offer_shop, last_updated, 'success') 160 cur.execute(insert_script, insert_value) 161 db_connection.commit() 162 cur.close() 163 db_connection.close() 105 164 106 if flag:107 # print('ALREADY IN DATABASE')108 # print(new_offer)109 # if it's already in database, check PRICE and if it's changed, change it !!!!!!110 if flag_price:111 print('PRICE CHANGED!') # CHANGE PRICE112 print('offer id: ' + str(offer_id))113 headers = {'Content-type': 'application/json'}114 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),115 headers=headers)116 else:117 print('ADDED') # ADD OFFER118 print(new_offer)119 headers = {'Content-type': 'application/json'}120 requests.post('http://localhost:8080/phoneoffer/addoffer',121 headers=headers, data=json.dumps(new_offer.__dict__, default=str))122 123 print('------------------------------------')124 125 for old_offer in database_offers:126 flag = False127 for new_offer in new_offers:128 if old_offer.offer_name == new_offer.offer_name:129 flag = True130 131 if not flag:132 print('OFFER DELETED')133 print(old_offer)134 # DELETE OFFER135 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note:
See TracChangeset
for help on using the changeset viewer.