Changeset 47f4eaf for phonelux_scrappers/scrappers/mobilezone_scrapper.py
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/mobilezone_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 18 19 is_validated = False 19 20 20 # Mobile Zone phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 21 30 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text)) 31 try: 32 # Mobile Zone phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text)) 23 34 24 database_offers = []35 database_offers = [] 25 36 26 for offer in offers:27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],28 offer['ram_memory'],29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],31 offer['image_url'],32 offer['offer_url'], offer['last_updated'], offer['is_validated'],33 offer['offer_description'],34 offer['offer_shop_code'])35 database_offers.append(phoneOffer)37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 36 47 37 new_offers = []48 new_offers = [] 38 49 39 for i in range(1, 3):40 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'50 for i in range(1, 3): 51 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/' 41 52 42 response1 = requests.get(mobilezone_url)43 soup1 = BeautifulSoup(response1.content, 'html.parser')53 response1 = requests.get(mobilezone_url) 54 soup1 = BeautifulSoup(response1.content, 'html.parser') 44 55 45 phones = soup1.find('ul', {46 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '47 '--paginated columns-4'}).find_all('li')56 phones = soup1.find('ul', { 57 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default ' 58 '--paginated columns-4'}).find_all('li') 48 59 49 for phone in phones:50 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(51 'href')52 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \53 .find('img').get('data-lazy-src')60 for phone in phones: 61 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get( 62 'href') 63 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \ 64 .find('img').get('data-lazy-src') 54 65 55 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})66 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'}) 56 67 57 if brand_section is not None: 58 brand = brand_section.find('a').get_text().strip() 68 if brand_section is not None: 69 brand = brand_section.find('a').get_text().strip() 70 else: 71 brand = None 72 73 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip() 74 75 if brand is not None and brand not in offer_name: 76 offer_name = brand + ' ' + offer_name 77 78 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'}) 79 price = None 80 81 if price_tag is not None: 82 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text() 83 .replace(',', '') 84 .replace('ден', '').strip())) 85 else: 86 continue 87 88 response2 = requests.get(offer_url) 89 soup2 = BeautifulSoup(response2.text, 'html.parser') 90 91 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr') 92 93 back_camera = None 94 front_camera = None 95 rom_memory = None 96 ram_memory = None 97 operating_system = None 98 cpu = None 99 chipset = None 100 offer_description = None 101 offer_shop_code = None 102 battery = None 103 color = None 104 105 for specification in specifications: 106 if 'Главна камера' in specification.find('th').get_text(): 107 back_camera = specification.find('td').get_text().strip() 108 109 if 'Селфи камера' in specification.find('th').get_text(): 110 front_camera = specification.find('td').get_text().strip() 111 112 if 'Батерија' in specification.find('th').get_text(): 113 battery = specification.find('td').get_text().strip() 114 115 if 'Меморија' in specification.find('th').get_text(): 116 rom_memory = specification.find('td').get_text().strip() 117 118 if 'Боја' in specification.find('th').get_text(): 119 color = specification.find('td').get_text().strip() 120 121 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 122 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 123 image_url, 124 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 125 126 for new_offer in new_offers: 127 flag = False 128 flag_price = False 129 offer_id = None 130 131 for old_offer in database_offers: 132 133 if new_offer.offer_name == old_offer.offer_name: 134 flag = True 135 if new_offer.price != old_offer.price: 136 flag_price = True 137 offer_id = old_offer.offer_id 138 139 if flag: 140 # print('ALREADY IN DATABASE') 141 # print(new_offer) 142 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 143 if flag_price: 144 print('PRICE CHANGED!') # CHANGE PRICE 145 print('offer id: ' + str(offer_id)) 146 headers = {'Content-type': 'application/json'} 147 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 148 headers=headers) 59 149 else: 60 brand = None 150 print('ADDED') # ADD OFFER 151 print(new_offer) 152 headers = {'Content-type': 'application/json'} 153 requests.post('http://localhost:8080/phoneoffer/addoffer', 154 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 61 155 62 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip() 63 64 if brand is not None and brand not in offer_name: 65 offer_name = brand + ' ' + offer_name 66 67 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'}) 68 price = None 69 70 if price_tag is not None: 71 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text() 72 .replace(',', '') 73 .replace('ден', '').strip())) 74 else: 75 continue 76 77 response2 = requests.get(offer_url) 78 soup2 = BeautifulSoup(response2.text, 'html.parser') 79 80 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr') 81 82 back_camera = None 83 front_camera = None 84 rom_memory = None 85 ram_memory = None 86 operating_system = None 87 cpu = None 88 chipset = None 89 offer_description = None 90 offer_shop_code = None 91 battery = None 92 color = None 93 94 for specification in specifications: 95 if 'Главна камера' in specification.find('th').get_text(): 96 back_camera = specification.find('td').get_text().strip() 97 98 if 'Селфи камера' in specification.find('th').get_text(): 99 front_camera = specification.find('td').get_text().strip() 100 101 if 'Батерија' in specification.find('th').get_text(): 102 battery = specification.find('td').get_text().strip() 103 104 if 'Меморија' in specification.find('th').get_text(): 105 rom_memory = specification.find('td').get_text().strip() 106 107 if 'Боја' in specification.find('th').get_text(): 108 color = specification.find('td').get_text().strip() 109 110 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 111 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 112 image_url, 113 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 114 115 for new_offer in new_offers: 116 flag = False 117 flag_price = False 118 offer_id = None 156 print('------------------------------------') 119 157 120 158 for old_offer in database_offers: 159 flag = False 160 for new_offer in new_offers: 161 if old_offer.offer_name == new_offer.offer_name: 162 flag = True 121 163 122 if new_offer.offer_name == old_offer.offer_name: 123 flag = True 124 if new_offer.price != old_offer.price: 125 flag_price = True 126 offer_id = old_offer.offer_id 127 128 if flag: 129 # print('ALREADY IN DATABASE') 130 # print(new_offer) 131 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 132 if flag_price: 133 print('PRICE CHANGED!') # CHANGE PRICE 134 print('offer id: ' + str(offer_id)) 135 headers = {'Content-type': 'application/json'} 136 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 137 headers=headers) 138 else: 139 print('ADDED') # ADD OFFER 140 print(new_offer) 141 headers = {'Content-type': 'application/json'} 142 requests.post('http://localhost:8080/phoneoffer/addoffer', 143 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 144 145 print('------------------------------------') 146 147 for old_offer in database_offers: 148 flag = False 149 for new_offer in new_offers: 150 if old_offer.offer_name == new_offer.offer_name: 151 flag = True 152 153 if not flag: 154 print('OFFER DELETED') 155 print(old_offer) 156 # DELETE OFFER 157 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 164 if not flag: 165 print('OFFER DELETED') 166 print(old_offer) 167 # DELETE OFFER 168 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 169 except Exception: 170 traceback.print_exc() 171 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 172 ' VALUES (%s, %s, %s);' 173 insert_value = (offer_shop, last_updated, 'failed') 174 cur.execute(insert_script, insert_value) 175 db_connection.commit() 176 cur.close() 177 db_connection.close() 178 else: 179 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 180 ' VALUES (%s, %s, %s);' 181 insert_value = (offer_shop, last_updated, 'success') 182 cur.execute(insert_script, insert_value) 183 db_connection.commit() 184 cur.close() 185 db_connection.close()
Note:
See TracChangeset
for help on using the changeset viewer.