Changeset 47f4eaf for phonelux_scrappers/scrappers
- Timestamp:
- 11/20/22 16:34:52 (2 years ago)
- Branches:
- master
- Parents:
- ffd50db
- Location:
- phonelux_scrappers/scrappers
- Files:
-
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/a1_scrapper.py
rffd50db r47f4eaf 1 import traceback 1 2 import unicodedata 2 3 from datetime import datetime … … 18 19 is_validated = False 19 20 20 # A1 phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 21 30 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text)) 31 try: 32 # A1 phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text)) 23 34 24 database_offers = []35 database_offers = [] 25 36 26 for offer in offers:27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],28 offer['ram_memory'],29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],31 offer['image_url'],32 offer['offer_url'], offer['last_updated'], offer['is_validated'],33 offer['offer_description'],34 offer['offer_shop_code'])35 database_offers.append(phoneOffer)37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 36 47 37 a1_url = 'https://www.a1.mk/webshop/mk/phones'48 a1_url = 'https://www.a1.mk/webshop/mk/phones' 38 49 39 response1 = requests.get(a1_url)40 soup1 = BeautifulSoup(response1.content, 'html.parser')50 response1 = requests.get(a1_url) 51 soup1 = BeautifulSoup(response1.content, 'html.parser') 41 52 42 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \43 .find_all('div', {'class', 'dvc-idtfr by4'})53 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \ 54 .find_all('div', {'class', 'dvc-idtfr by4'}) 44 55 45 new_offers = []56 new_offers = [] 46 57 47 for phone in phones:48 brand = phone.get('data-brand').strip()49 offer_name = brand + " " + phone.get('data-model').strip()58 for phone in phones: 59 brand = phone.get('data-brand').strip() 60 offer_name = brand + " " + phone.get('data-model').strip() 50 61 51 # if brand not in offer_name:52 # offer_name = brand+" "+offer_name62 # if brand not in offer_name: 63 # offer_name = brand+" "+offer_name 53 64 54 offer_shop_code = phone.get('data-productid').strip()55 offer_url = phone.find('a', {'class', 'device-link'}).get('href')56 image_url = phone.get('data-image')65 offer_shop_code = phone.get('data-productid').strip() 66 offer_url = phone.find('a', {'class', 'device-link'}).get('href') 67 image_url = phone.get('data-image') 57 68 58 response2 = requests.get(offer_url)59 soup2 = BeautifulSoup(response2.content, 'html.parser')69 response2 = requests.get(offer_url) 70 soup2 = BeautifulSoup(response2.content, 'html.parser') 60 71 61 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \62 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')72 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \ 73 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div') 63 74 64 # offer price65 price = None66 for temp_price in temp_prices:67 if 'Цена само за уред' in temp_price.get_text().strip():68 price = int(temp_price.get_text().replace('Цена само за уред', '')69 .replace('Одбери', '').replace('денари', '').replace('.', '').strip())75 # offer price 76 price = None 77 for temp_price in temp_prices: 78 if 'Цена само за уред' in temp_price.get_text().strip(): 79 price = int(temp_price.get_text().replace('Цена само за уред', '') 80 .replace('Одбери', '').replace('денари', '').replace('.', '').strip()) 70 81 71 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')82 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label') 72 83 73 temp_colors = []74 for color_section in colors_section:75 temp_colors.append(color_section.get('data-content'))84 temp_colors = [] 85 for color_section in colors_section: 86 temp_colors.append(color_section.get('data-content')) 76 87 77 color = ','.join(temp_colors) # colors available for the offer88 color = ','.join(temp_colors) # colors available for the offer 78 89 79 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()90 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip() 80 91 81 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')92 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr') 82 93 83 back_camera = None84 operating_system = None85 cpu = None86 rom_memory = None87 ram_memory = None88 battery = None89 front_camera = None90 chipset = None91 offer_description = None94 back_camera = None 95 operating_system = None 96 cpu = None 97 rom_memory = None 98 ram_memory = None 99 battery = None 100 front_camera = None 101 chipset = None 102 offer_description = None 92 103 93 for row in table_rows:94 if 'Камера' in row.get_text().strip():95 back_camera = row.get_text().replace('Камера', '').strip()104 for row in table_rows: 105 if 'Камера' in row.get_text().strip(): 106 back_camera = row.get_text().replace('Камера', '').strip() 96 107 97 if 'Оперативен систем' in row.get_text().strip():98 operating_system = row.get_text().replace('Оперативен систем', '').strip()108 if 'Оперативен систем' in row.get_text().strip(): 109 operating_system = row.get_text().replace('Оперативен систем', '').strip() 99 110 100 if 'CPU' in row.get_text().strip():101 cpu = row.get_text().replace('CPU', '').strip()111 if 'CPU' in row.get_text().strip(): 112 cpu = row.get_text().replace('CPU', '').strip() 102 113 103 if 'Вградена меморија' in row.get_text().strip():104 rom_memory = row.get_text().replace('Вградена меморија', '').strip()114 if 'Вградена меморија' in row.get_text().strip(): 115 rom_memory = row.get_text().replace('Вградена меморија', '').strip() 105 116 106 if 'RAM меморија' in row.get_text().strip():107 ram_memory = row.get_text().replace('RAM меморија', '').strip()117 if 'RAM меморија' in row.get_text().strip(): 118 ram_memory = row.get_text().replace('RAM меморија', '').strip() 108 119 109 if 'Батерија' in row.get_text().strip():110 battery = row.get_text().replace('Батерија', '').strip()120 if 'Батерија' in row.get_text().strip(): 121 battery = row.get_text().replace('Батерија', '').strip() 111 122 112 if 'Предна камера' in row.get_text().strip():113 front_camera = row.get_text().replace('Предна камера', '').strip()123 if 'Предна камера' in row.get_text().strip(): 124 front_camera = row.get_text().replace('Предна камера', '').strip() 114 125 115 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 116 color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url, 117 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 126 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 127 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 128 image_url, 129 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 118 130 119 for new_offer in new_offers: 120 flag = False 121 flag_price = False 122 offer_id = None 131 for new_offer in new_offers: 132 flag = False 133 flag_price = False 134 offer_id = None 135 136 for old_offer in database_offers: 137 138 if new_offer.offer_shop_code == old_offer.offer_shop_code: 139 flag = True 140 if new_offer.price != old_offer.price: 141 flag_price = True 142 offer_id = old_offer.offer_id 143 144 if flag: 145 # print('ALREADY IN DATABASE') 146 # print(new_offer) 147 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 148 if flag_price: 149 print('PRICE CHANGED!') # CHANGE PRICE 150 print('offer id: ' + str(offer_id)) 151 headers = {'Content-type': 'application/json'} 152 requests.put( 153 'http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 154 headers=headers) 155 else: 156 print('ADDED') # ADD OFFER 157 print(new_offer) 158 headers = {'Content-type': 'application/json'} 159 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, 160 data=json.dumps(new_offer.__dict__, 161 default=str)) 162 163 print('------------------------------------') 123 164 124 165 for old_offer in database_offers: 166 flag = False 167 for new_offer in new_offers: 168 if old_offer.offer_shop_code == new_offer.offer_shop_code: 169 flag = True 125 170 126 if new_offer.offer_shop_code == old_offer.offer_shop_code: 127 flag = True 128 if new_offer.price != old_offer.price: 129 flag_price = True 130 offer_id = old_offer.offer_id 131 132 if flag: 133 # print('ALREADY IN DATABASE') 134 # print(new_offer) 135 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 136 if flag_price: 137 print('PRICE CHANGED!') # CHANGE PRICE 138 print('offer id: ' + str(offer_id)) 139 headers = {'Content-type': 'application/json'} 140 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 141 headers=headers) 142 else: 143 print('ADDED') # ADD OFFER 144 print(new_offer) 145 headers = {'Content-type': 'application/json'} 146 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__, 147 default=str)) 148 149 print('------------------------------------') 150 151 for old_offer in database_offers: 152 flag = False 153 for new_offer in new_offers: 154 if old_offer.offer_shop_code == new_offer.offer_shop_code: 155 flag = True 156 157 if not flag: 158 print('OFFER DELETED') 159 print(old_offer) 160 # DELETE OFFER 161 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 171 if not flag: 172 print('OFFER DELETED') 173 print(old_offer) 174 # DELETE OFFER 175 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 176 except Exception: 177 traceback.print_exc() 178 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 179 ' VALUES (%s, %s, %s);' 180 insert_value = (offer_shop, last_updated, 'failed') 181 cur.execute(insert_script, insert_value) 182 db_connection.commit() 183 cur.close() 184 db_connection.close() 185 else: 186 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 187 ' VALUES (%s, %s, %s);' 188 insert_value = (offer_shop, last_updated, 'success') 189 cur.execute(insert_script, insert_value) 190 db_connection.commit() 191 cur.close() 192 db_connection.close() -
phonelux_scrappers/scrappers/akcija_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 from datetime import datetime 3 4 … … 18 19 is_validated = False 19 20 20 # Akcija phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 21 30 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text)) 31 try: 32 # Akcija phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text)) 23 34 24 database_offers = []35 database_offers = [] 25 36 26 for offer in offers:27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],28 offer['ram_memory'],29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],31 offer['image_url'],32 offer['offer_url'], offer['last_updated'], offer['is_validated'],33 offer['offer_description'],34 offer['offer_shop_code'])35 database_offers.append(phoneOffer)37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 36 47 37 new_offers = []48 new_offers = [] 38 49 39 i = 040 while i <= 20:41 akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"42 response1 = requests.get(akcija_url)43 response1.encoding = 'utf-8'44 soup1 = BeautifulSoup(response1.text, 'html.parser')50 i = 0 51 while i <= 20: 52 akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni" 53 response1 = requests.get(akcija_url) 54 response1.encoding = 'utf-8' 55 soup1 = BeautifulSoup(response1.text, 'html.parser') 45 56 46 phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})57 phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'}) 47 58 48 for phone in phones:49 offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \50 .get_text().replace('Паметен телефон', '').strip()51 brand = offer_name.split(' ')[0]59 for phone in phones: 60 offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \ 61 .get_text().replace('Паметен телефон', '').strip() 62 brand = offer_name.split(' ')[0] 52 63 53 if brand not in offer_name:54 offer_name = brand + " " + offer_name64 if brand not in offer_name: 65 offer_name = brand + " " + offer_name 55 66 56 offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')57 image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')58 price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \59 .find('ins').get_text().split(' ')[0].strip())67 offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href') 68 image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src') 69 price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \ 70 .find('ins').get_text().split(' ')[0].strip()) 60 71 61 response2 = requests.get(offer_url)62 response2.encoding = 'utf-8'63 soup2 = BeautifulSoup(response2.text, 'html.parser')72 response2 = requests.get(offer_url) 73 response2.encoding = 'utf-8' 74 soup2 = BeautifulSoup(response2.text, 'html.parser') 64 75 65 back_camera = None66 operating_system = None67 chipset = None68 battery = None69 ram_memory = None70 rom_memory = None71 cpu = None72 front_camera = None73 color = None74 offer_shop_code = None76 back_camera = None 77 operating_system = None 78 chipset = None 79 battery = None 80 ram_memory = None 81 rom_memory = None 82 cpu = None 83 front_camera = None 84 color = None 85 offer_shop_code = None 75 86 76 specifications = soup2.find('main', {'id': 'content'}) \77 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \78 .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')87 specifications = soup2.find('main', {'id': 'content'}) \ 88 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \ 89 .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p') 79 90 80 offer_description = ''81 for specification in specifications:82 if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):83 continue84 offer_description += unicodedata.normalize('NFKD',85 str(specification.get_text(separator='\n').strip())) + "\n"91 offer_description = '' 92 for specification in specifications: 93 if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()): 94 continue 95 offer_description += unicodedata.normalize('NFKD', 96 str(specification.get_text(separator='\n').strip())) + "\n" 86 97 87 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,88 color, front_camera, back_camera, chipset, battery, operating_system, cpu,89 image_url,90 offer_url, last_updated, is_validated, offer_description, offer_shop_code))91 i += 2098 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 99 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 100 image_url, 101 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 102 i += 20 92 103 93 for new_offer in new_offers: 94 flag = False 95 flag_price = False 96 offer_id = None 104 for new_offer in new_offers: 105 flag = False 106 flag_price = False 107 offer_id = None 108 109 for old_offer in database_offers: 110 111 if new_offer.offer_name == old_offer.offer_name: 112 flag = True 113 if new_offer.price != old_offer.price: 114 flag_price = True 115 offer_id = old_offer.offer_id 116 117 if flag: 118 # print('ALREADY IN DATABASE') 119 # print(new_offer) 120 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 121 if flag_price: 122 print('PRICE CHANGED!') # CHANGE PRICE 123 print('offer id: ' + str(offer_id)) 124 headers = {'Content-type': 'application/json'} 125 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 126 headers=headers) 127 else: 128 print('ADDED') # ADD OFFER 129 print(new_offer) 130 headers = {'Content-type': 'application/json'} 131 requests.post('http://localhost:8080/phoneoffer/addoffer', 132 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 133 134 print('------------------------------------') 97 135 98 136 for old_offer in database_offers: 137 flag = False 138 for new_offer in new_offers: 139 if old_offer.offer_name == new_offer.offer_name: 140 flag = True 99 141 100 if new_offer.offer_name == old_offer.offer_name: 101 flag = True 102 if new_offer.price != old_offer.price: 103 flag_price = True 104 offer_id = old_offer.offer_id 142 if not flag: 143 print('OFFER DELETED') 144 print(old_offer) 145 # DELETE OFFER 146 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 147 except Exception: 148 traceback.print_exc() 149 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 150 ' VALUES (%s, %s, %s);' 151 insert_value = (offer_shop, last_updated, 'failed') 152 cur.execute(insert_script, insert_value) 153 db_connection.commit() 154 cur.close() 155 db_connection.close() 156 else: 157 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 158 ' VALUES (%s, %s, %s);' 159 insert_value = (offer_shop, last_updated, 'success') 160 cur.execute(insert_script, insert_value) 161 db_connection.commit() 162 cur.close() 163 db_connection.close() 105 164 106 if flag:107 # print('ALREADY IN DATABASE')108 # print(new_offer)109 # if it's already in database, check PRICE and if it's changed, change it !!!!!!110 if flag_price:111 print('PRICE CHANGED!') # CHANGE PRICE112 print('offer id: ' + str(offer_id))113 headers = {'Content-type': 'application/json'}114 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),115 headers=headers)116 else:117 print('ADDED') # ADD OFFER118 print(new_offer)119 headers = {'Content-type': 'application/json'}120 requests.post('http://localhost:8080/phoneoffer/addoffer',121 headers=headers, data=json.dumps(new_offer.__dict__, default=str))122 123 print('------------------------------------')124 125 for old_offer in database_offers:126 flag = False127 for new_offer in new_offers:128 if old_offer.offer_name == new_offer.offer_name:129 flag = True130 131 if not flag:132 print('OFFER DELETED')133 print(old_offer)134 # DELETE OFFER135 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/handy_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 20 21 is_validated = False 21 22 22 # Handy phone offers that are already in database 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text)) 23 # Call to read the configuration file and connect to database 24 cinfo = config_read.get_databaseconfig("../postgresdb.config") 25 db_connection = psycopg2.connect( 26 database=cinfo[0], 27 host=cinfo[1], 28 user=cinfo[2], 29 password=cinfo[3] 30 ) 31 cur = db_connection.cursor() 24 32 25 database_offers = [] 33 try: 34 # Handy phone offers that are already in database 35 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text)) 26 36 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 database_offers = [] 37 38 38 new_offers = [] 39 for offer in offers: 40 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 41 offer['ram_memory'], 42 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 43 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 44 offer['image_url'], 45 offer['offer_url'], offer['last_updated'], offer['is_validated'], 46 offer['offer_description'], 47 offer['offer_shop_code']) 48 database_offers.append(phoneOffer) 39 49 40 handy_url = 'https://www.handy.mk/telefoni?page=6' 50 new_offers = [] 41 51 42 response1 = requests.get(handy_url) 43 soup1 = BeautifulSoup(response1.content, 'html.parser') 52 handy_url = 'https://www.handy.mk/telefoni?page=6' 44 53 45 phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'}) 54 response1 = requests.get(handy_url) 55 soup1 = BeautifulSoup(response1.content, 'html.parser') 46 56 47 for phone in phones: 48 offer_url = phone.find('a').get('href') 49 offer_name = phone.find('div', {'data-hook': 'not-image-container'})\ 50 .find('h3', {'data-hook': 'product-item-name'}).get_text().strip() 51 brand = offer_name.split(' ')[0].capitalize() 52 price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\ 53 .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.'))) 57 phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'}) 54 58 55 response2 = requests.get(offer_url) 56 soup2 = BeautifulSoup(response2.text, 'html.parser') 59 for phone in phones: 60 offer_url = phone.find('a').get('href') 61 offer_name = phone.find('div', {'data-hook': 'not-image-container'})\ 62 .find('h3', {'data-hook': 'product-item-name'}).get_text().strip() 63 brand = offer_name.split(' ')[0].capitalize() 64 price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\ 65 .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.'))) 57 66 58 back_camera = None 59 operating_system = None 60 chipset = None 61 battery = None 62 ram_memory = None 63 rom_memory = None 64 cpu = None 65 front_camera = None 66 offer_shop_code = None 67 color = None 68 image_url = None 67 response2 = requests.get(offer_url) 68 soup2 = BeautifulSoup(response2.text, 'html.parser') 69 69 70 color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'}) 71 if color_section is not None: 72 temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\ 73 .find_all('input', {'type': 'radio'}) 74 colors_list = [] 75 for temp_color in temp_colors: 76 colors_list.append(temp_color.get('aria-label')) 77 color = ','.join(colors_list) 70 back_camera = None 71 operating_system = None 72 chipset = None 73 battery = None 74 ram_memory = None 75 rom_memory = None 76 cpu = None 77 front_camera = None 78 offer_shop_code = None 79 color = None 80 image_url = None 78 81 79 rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li') 82 color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'}) 83 if color_section is not None: 84 temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\ 85 .find_all('input', {'type': 'radio'}) 86 colors_list = [] 87 for temp_color in temp_colors: 88 colors_list.append(temp_color.get('aria-label')) 89 color = ','.join(colors_list) 80 90 81 if len(rows) == 0: 82 rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr') 91 rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li') 83 92 84 specifications = [] 93 if len(rows) == 0: 94 rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr') 85 95 86 for row in rows: 87 specifications.append(unicodedata.normalize('NFKD', row.get_text().strip())) 96 specifications = [] 88 97 89 offer_description = '\n'.join(specifications) 98 for row in rows: 99 specifications.append(unicodedata.normalize('NFKD', row.get_text().strip())) 90 100 91 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 92 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 93 image_url, 94 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 101 offer_description = '\n'.join(specifications) 95 102 96 for new_offer in new_offers: 97 flag = False 98 flag_price = False 99 offer_id = None 103 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 104 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 105 image_url, 106 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 107 108 for new_offer in new_offers: 109 flag = False 110 flag_price = False 111 offer_id = None 112 113 for old_offer in database_offers: 114 115 if new_offer.offer_name == old_offer.offer_name: 116 flag = True 117 if new_offer.price != old_offer.price: 118 flag_price = True 119 offer_id = old_offer.offer_id 120 121 if flag: 122 # print('ALREADY IN DATABASE') 123 # print(new_offer) 124 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 125 if flag_price: 126 print('PRICE CHANGED!') # CHANGE PRICE 127 print('offer id: ' + str(offer_id)) 128 headers = {'Content-type': 'application/json'} 129 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 130 headers=headers) 131 else: 132 print('ADDED') # ADD OFFER 133 print(new_offer) 134 headers = {'Content-type': 'application/json'} 135 requests.post('http://localhost:8080/phoneoffer/addoffer', 136 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 137 138 print('------------------------------------') 100 139 101 140 for old_offer in database_offers: 141 flag = False 142 for new_offer in new_offers: 143 if old_offer.offer_name == new_offer.offer_name: 144 flag = True 102 145 103 if new_offer.offer_name == old_offer.offer_name: 104 flag = True 105 if new_offer.price != old_offer.price: 106 flag_price = True 107 offer_id = old_offer.offer_id 108 109 if flag: 110 # print('ALREADY IN DATABASE') 111 # print(new_offer) 112 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 113 if flag_price: 114 print('PRICE CHANGED!') # CHANGE PRICE 115 print('offer id: ' + str(offer_id)) 116 headers = {'Content-type': 'application/json'} 117 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 118 headers=headers) 119 else: 120 print('ADDED') # ADD OFFER 121 print(new_offer) 122 headers = {'Content-type': 'application/json'} 123 requests.post('http://localhost:8080/phoneoffer/addoffer', 124 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 125 126 print('------------------------------------') 127 128 for old_offer in database_offers: 129 flag = False 130 for new_offer in new_offers: 131 if old_offer.offer_name == new_offer.offer_name: 132 flag = True 133 134 if not flag: 135 print('OFFER DELETED') 136 print(old_offer) 137 # DELETE OFFER 138 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 146 if not flag: 147 print('OFFER DELETED') 148 print(old_offer) 149 # DELETE OFFER 150 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 151 except Exception: 152 traceback.print_exc() 153 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 154 ' VALUES (%s, %s, %s);' 155 insert_value = (offer_shop, last_updated, 'failed') 156 cur.execute(insert_script, insert_value) 157 db_connection.commit() 158 cur.close() 159 db_connection.close() 160 else: 161 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 162 ' VALUES (%s, %s, %s);' 163 insert_value = (offer_shop, last_updated, 'success') 164 cur.execute(insert_script, insert_value) 165 db_connection.commit() 166 cur.close() 167 db_connection.close() 139 168 140 169 170 -
phonelux_scrappers/scrappers/ledikom_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 19 20 is_validated = False 20 21 21 # Ledikom phone offers that are already in database 22 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/ledikom').text)) 24 25 database_offers = [] 26 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 38 new_offers = [] 39 40 ledikom_phone_urls = [ 41 'https://ledikom.mk/c/416/uredi/apple/iphone?limit=96', 42 'https://ledikom.mk/c/421/uredi/samsung/telefoni?limit=96', 43 'https://ledikom.mk/c/424/mobilni-telefoni/xiaomi/telefoni?limit=96', 44 'https://ledikom.mk/c/430/uredi/huawei/telefoni?limit=96', 45 'https://ledikom.mk/c/441/uredi/oneplus/telefoni?limit=96', 46 'https://ledikom.mk/c/413/uredi/google/telefoni?limit=96', 47 'https://ledikom.mk/c/411/uredi/honor/telefoni?limit=96', 48 'https://ledikom.mk/c/460/uredi/nokia/telefoni?limit=96', 49 'https://ledikom.mk/c/461/uredi/asus/telefoni?limit=96', 50 'https://ledikom.mk/c/488/proizvodi/oppo/telefoni?limit=96' 51 ] 52 53 for ledikom_url in ledikom_phone_urls: 54 55 # selenium is used because of the dynamic content of the page 56 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 57 driver1.get(ledikom_url) 58 ledikom_html = driver1.page_source 59 60 # closing the driver so the safari instance can pair with another webdriver session 61 driver1.close() 62 63 soup1 = BeautifulSoup(ledikom_html, 'html.parser') 64 65 phones = soup1.find('div', {'id': 'content'}) \ 66 .find('div', {'class': 'container'}).find('div', {'class': 'row'}).find('div', {'class': 'item-display'}) \ 67 .find_all('div', {'class': 'item-in-grid'}) 68 69 if len(phones) == 0: 70 continue 71 72 for phone in phones: 73 offer_url = 'https://ledikom.mk' + phone.find('a').get('href') 74 image_url = phone.find('a').find('img').get('src') 75 temp_offer_name = phone.find('div', {'class': 'item-name'}).find('a').get_text().strip() 76 offer_name = ' '.join(temp_offer_name.split()) 77 brand = offer_name.split(' ')[0] 78 price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '') 79 .replace('ден', '') 80 .replace('.', '').strip()) 81 22 # Call to read the configuration file and connect to database 23 cinfo = config_read.get_databaseconfig("../postgresdb.config") 24 db_connection = psycopg2.connect( 25 database=cinfo[0], 26 host=cinfo[1], 27 user=cinfo[2], 28 password=cinfo[3] 29 ) 30 cur = db_connection.cursor() 31 32 try: 33 # Ledikom phone offers that are already in database 34 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/ledikom').text)) 35 36 database_offers = [] 37 38 for offer in offers: 39 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 40 offer['ram_memory'], 41 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 42 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 43 offer['image_url'], 44 offer['offer_url'], offer['last_updated'], offer['is_validated'], 45 offer['offer_description'], 46 offer['offer_shop_code']) 47 database_offers.append(phoneOffer) 48 49 new_offers = [] 50 51 ledikom_phone_urls = [ 52 'https://ledikom.mk/c/416/uredi/apple/iphone?limit=96', 53 'https://ledikom.mk/c/421/uredi/samsung/telefoni?limit=96', 54 'https://ledikom.mk/c/424/mobilni-telefoni/xiaomi/telefoni?limit=96', 55 'https://ledikom.mk/c/430/uredi/huawei/telefoni?limit=96', 56 'https://ledikom.mk/c/441/uredi/oneplus/telefoni?limit=96', 57 'https://ledikom.mk/c/413/uredi/google/telefoni?limit=96', 58 'https://ledikom.mk/c/411/uredi/honor/telefoni?limit=96', 59 'https://ledikom.mk/c/460/uredi/nokia/telefoni?limit=96', 60 'https://ledikom.mk/c/461/uredi/asus/telefoni?limit=96', 61 'https://ledikom.mk/c/488/proizvodi/oppo/telefoni?limit=96' 62 ] 63 64 for ledikom_url in ledikom_phone_urls: 65 66 # selenium is used because of the dynamic content of the page 82 67 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 83 driver1.get(offer_url) 84 # getting offer page html 85 offer_html = driver1.page_source 68 driver1.get(ledikom_url) 69 ledikom_html = driver1.page_source 70 71 # closing the driver so the safari instance can pair with another webdriver session 86 72 driver1.close() 87 73 88 soup2 = BeautifulSoup(offer_html, 'html.parser') 89 90 specifications = soup2.find('div', {'id': 'content'}).find('section', {'class': 'padding-section'}) \ 91 .find_all('div', {'class': 'container'})[1].find('div', {'class': 'col-md-7'}) \ 92 .find_all('div', {'class': 'row'}) 93 94 color = None 95 rom_memory = None 96 ram_memory = None 97 back_camera = None 98 operating_system = None 99 chipset = None 100 battery = None 101 cpu = None 102 front_camera = None 103 offer_shop_code = None 104 offer_description = None 105 106 if len(specifications) != 0: 107 colors_tags = specifications[0].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a') 108 temp_colors = [] 109 for color_tag in colors_tags: 110 temp_colors.append(color_tag.get_text().strip()) 111 color = ','.join(temp_colors) 112 113 if len(specifications) >= 2: 114 temp_rom = specifications[1].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a') 115 rom_list = [] 116 for rom in temp_rom: 117 rom_list.append(rom.get('title')) 118 rom_memory = ','.join(rom_list) 119 120 if len(specifications) >= 3: 121 temp_ram = specifications[2].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a') 122 ram_list = [] 123 for ram in temp_ram: 124 ram_list.append(ram.get('title')) 125 126 ram_memory = ','.join(ram_list) 127 128 if 'Xiaomi' in brand: 129 temp = color 130 color = rom_memory 131 rom_memory = temp 132 133 temp = ram_memory 134 ram_memory = color 135 color = temp 136 137 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 138 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 139 image_url, 140 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 141 142 for new_offer in new_offers: 143 flag = False 144 flag_price = False 145 offer_id = None 74 soup1 = BeautifulSoup(ledikom_html, 'html.parser') 75 76 phones = soup1.find('div', {'id': 'content'}) \ 77 .find('div', {'class': 'container'}).find('div', {'class': 'row'}).find('div', {'class': 'item-display'}) \ 78 .find_all('div', {'class': 'item-in-grid'}) 79 80 if len(phones) == 0: 81 continue 82 83 for phone in phones: 84 offer_url = 'https://ledikom.mk' + phone.find('a').get('href') 85 image_url = phone.find('a').find('img').get('src') 86 temp_offer_name = phone.find('div', {'class': 'item-name'}).find('a').get_text().strip() 87 offer_name = ' '.join(temp_offer_name.split()) 88 brand = offer_name.split(' ')[0] 89 price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '') 90 .replace('ден', '') 91 .replace('.', '').strip()) 92 93 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 94 driver1.get(offer_url) 95 # getting offer page html 96 offer_html = driver1.page_source 97 driver1.close() 98 99 soup2 = BeautifulSoup(offer_html, 'html.parser') 100 101 specifications = soup2.find('div', {'id': 'content'}).find('section', {'class': 'padding-section'}) \ 102 .find_all('div', {'class': 'container'})[1].find('div', {'class': 'col-md-7'}) \ 103 .find_all('div', {'class': 'row'}) 104 105 color = None 106 rom_memory = None 107 ram_memory = None 108 back_camera = None 109 operating_system = None 110 chipset = None 111 battery = None 112 cpu = None 113 front_camera = None 114 offer_shop_code = None 115 offer_description = None 116 117 if len(specifications) != 0: 118 colors_tags = specifications[0].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a') 119 temp_colors = [] 120 for color_tag in colors_tags: 121 temp_colors.append(color_tag.get_text().strip()) 122 color = ','.join(temp_colors) 123 124 if len(specifications) >= 2: 125 temp_rom = specifications[1].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a') 126 rom_list = [] 127 for rom in temp_rom: 128 rom_list.append(rom.get('title')) 129 rom_memory = ','.join(rom_list) 130 131 if len(specifications) >= 3: 132 temp_ram = specifications[2].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a') 133 ram_list = [] 134 for ram in temp_ram: 135 ram_list.append(ram.get('title')) 136 137 ram_memory = ','.join(ram_list) 138 139 if 'Xiaomi' in brand: 140 temp = color 141 color = rom_memory 142 rom_memory = temp 143 144 temp = ram_memory 145 ram_memory = color 146 color = temp 147 148 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 149 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 150 image_url, 151 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 152 153 for new_offer in new_offers: 154 flag = False 155 flag_price = False 156 offer_id = None 157 158 for old_offer in database_offers: 159 160 if new_offer.offer_name == old_offer.offer_name: 161 flag = True 162 if new_offer.price != old_offer.price: 163 flag_price = True 164 offer_id = old_offer.offer_id 165 166 if flag: 167 # print('ALREADY IN DATABASE') 168 # print(new_offer) 169 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 170 if flag_price: 171 print('PRICE CHANGED!') # CHANGE PRICE 172 print('offer id: ' + str(offer_id)) 173 headers = {'Content-type': 'application/json'} 174 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 175 headers=headers) 176 else: 177 print('ADDED') # ADD OFFER 178 print(new_offer) 179 headers = {'Content-type': 'application/json'} 180 requests.post('http://localhost:8080/phoneoffer/addoffer', 181 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 182 183 print('------------------------------------') 146 184 147 185 for old_offer in database_offers: 148 149 if new_offer.offer_name == old_offer.offer_name: 150 flag = True 151 if new_offer.price != old_offer.price: 152 flag_price = True 153 offer_id = old_offer.offer_id 154 155 if flag: 156 # print('ALREADY IN DATABASE') 157 # print(new_offer) 158 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 159 if flag_price: 160 print('PRICE CHANGED!') # CHANGE PRICE 161 print('offer id: ' + str(offer_id)) 162 headers = {'Content-type': 'application/json'} 163 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 164 headers=headers) 165 else: 166 print('ADDED') # ADD OFFER 167 print(new_offer) 168 headers = {'Content-type': 'application/json'} 169 requests.post('http://localhost:8080/phoneoffer/addoffer', 170 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 171 172 print('------------------------------------') 173 174 for old_offer in database_offers: 175 flag = False 176 for new_offer in new_offers: 177 if old_offer.offer_name == new_offer.offer_name: 178 flag = True 179 180 if not flag: 181 print('OFFER DELETED') 182 print(old_offer) 183 # DELETE OFFER 184 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 186 flag = False 187 for new_offer in new_offers: 188 if old_offer.offer_name == new_offer.offer_name: 189 flag = True 190 191 if not flag: 192 print('OFFER DELETED') 193 print(old_offer) 194 # DELETE OFFER 195 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 196 except Exception: 197 traceback.print_exc() 198 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 199 ' VALUES (%s, %s, %s);' 200 insert_value = (offer_shop, last_updated, 'failed') 201 cur.execute(insert_script, insert_value) 202 db_connection.commit() 203 cur.close() 204 db_connection.close() 205 else: 206 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 207 ' VALUES (%s, %s, %s);' 208 insert_value = (offer_shop, last_updated, 'success') 209 cur.execute(insert_script, insert_value) 210 db_connection.commit() 211 cur.close() 212 db_connection.close() 213 -
phonelux_scrappers/scrappers/mobelix_scrapper.py
rffd50db r47f4eaf 3 3 import unicodedata 4 4 from datetime import datetime 5 5 import traceback 6 6 import psycopg2 7 7 import config_read … … 19 19 is_validated = False 20 20 21 # Mobelix phone offers that are already in database 22 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text)) 24 25 database_offers = [] 26 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 38 new_offers = [] 39 40 for i in range(1, 17): 41 mobelix_url = "https://mobelix.com.mk/mk/mobilni-telefoni?page=" + str(i) 42 43 response1 = requests.get(mobelix_url) 44 soup1 = BeautifulSoup(response1.content, 'html.parser') 45 46 phones = soup1.find_all('div', {'class': 'p-2 rounded text-dark bg-white d-flex w-100'}) 47 48 for phone in phones: 49 offer_url = phone.find('a').get('href') 50 image_url = phone.find_all('div', {'class': 'col-12'})[0].find('img').get('src') 51 brand = phone.find_all('div', {'class': 'col-12'})[1].find('h5', {'class': 'mb-0'}).get_text().strip() 52 offer_name = phone.find_all('div', {'class': 'col-12'})[1] \ 53 .find('h3', {'class': 'h5 font-weight-normal'}).get_text().strip() 54 55 if 'Watch' in offer_name or 'Pad' in offer_name or 'Tab' in offer_name or 'Pods' in offer_name or 'Buds' in offer_name or 'HomePod' in offer_name: 56 continue 57 58 if brand not in offer_name: 59 offer_name = brand + " " + offer_name 60 61 temp_prices = phone.find_all('div', {'class': 'col-12'})[1] \ 62 .find('p', {'class': 'h5 price'}).get_text(separator='/').strip() 63 64 if len(temp_prices.split('/')) > 1: 65 price = int(float(temp_prices.split('/')[1].replace(',', '').replace('ден', '').strip())) 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 30 31 try: 32 # Mobelix phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text)) 34 35 database_offers = [] 36 37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 47 48 new_offers = [] 49 50 for i in range(1, 17): 51 mobelix_url = "https://mobelix.com.mk/mk/mobilni-telefoni?page=" + str(i) 52 53 response1 = requests.get(mobelix_url) 54 soup1 = BeautifulSoup(response1.content, 'html.parser') 55 56 phones = soup1.find_all('div', {'class': 'p-2 rounded text-dark bg-white d-flex w-100'}) 57 58 for phone in phones: 59 offer_url = phone.find('a').get('href') 60 image_url = phone.find_all('div', {'class': 'col-12'})[0].find('img').get('src') 61 brand = phone.find_all('div', {'class': 'col-12'})[1].find('h5', {'class': 'mb-0'}).get_text().strip() 62 offer_name = phone.find_all('div', {'class': 'col-12'})[1] \ 63 .find('h3', {'class': 'h5 font-weight-normal'}).get_text().strip() 64 65 if 'Watch' in offer_name or 'Pad' in offer_name or 'Tab' in offer_name or 'Pods' in offer_name or 'Buds' in offer_name or 'HomePod' in offer_name: 66 continue 67 68 if brand not in offer_name: 69 offer_name = brand + " " + offer_name 70 71 temp_prices = phone.find_all('div', {'class': 'col-12'})[1] \ 72 .find('p', {'class': 'h5 price'}).get_text(separator='/').strip() 73 74 if len(temp_prices.split('/')) > 1: 75 price = int(float(temp_prices.split('/')[1].replace(',', '').replace('ден', '').strip())) 76 else: 77 price = int(float(temp_prices.split('/')[0].replace(',', '').replace('ден', '').strip())) 78 79 response2 = requests.get(offer_url) 80 soup2 = BeautifulSoup(response2.content, 'html.parser') 81 82 colors_divs = soup2.find('div', {'class': 'color-wrapper mt-2 mb-1'}) \ 83 .find_all('div', {'class': 'color-box d-inline-block'}) # color div tags 84 85 temp_colors = [] 86 for div in colors_divs: 87 temp_colors.append(div.get('title')) 88 89 color = ",".join(temp_colors) # available colors for offer 90 91 tables = soup2.find('div', {'class': 'mobelix-specs table-white bordered-table'}).find_all('table') 92 93 operating_system = None 94 chipset = None 95 battery = None 96 ram_memory = None 97 rom_memory = None 98 front_camera = '' 99 back_camera = '' 100 cpu = None 101 offer_shop_code = None 102 offer_description = None 103 104 for table in tables: 105 for cell in table.find_all('td'): 106 if cell.get('data-spec') is None: 107 continue 108 109 if cell.get('data-spec') == 'os': 110 operating_system = unicodedata.normalize('NFKD', cell.get_text().strip()) 111 112 if cell.get('data-spec') == 'chipset': 113 chipset = unicodedata.normalize('NFKD', cell.get_text().strip()) 114 115 if cell.get('data-spec') == 'cpu': 116 cpu = unicodedata.normalize('NFKD', cell.get_text().strip()) 117 118 if cell.get('data-spec') == 'internalmemory': 119 temp_rom = [] 120 temp_ram = [] 121 temp_internalmemory = unicodedata.normalize('NFKD', cell.get_text().strip()) 122 for internalmemory in temp_internalmemory.split(','): 123 temp_rom.append(internalmemory.strip().split(' ')[0]) 124 if len(internalmemory.strip().split(' ')) > 1: 125 temp_ram.append(internalmemory.strip().split(' ')[1]) 126 rom_memory = ','.join(temp_rom) 127 ram_memory = ','.join(temp_ram) 128 129 if cell.get('data-spec') == 'cam1modules' or cell.get('data-spec') == 'cam1features' or cell.get( 130 'data-spec') == 'cam1video': 131 back_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n' 132 133 if cell.get('data-spec') == 'cam2modules' or cell.get('data-spec') == 'cam2features' or cell.get( 134 'data-spec') == 'cam2video': 135 front_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n' 136 137 if cell.get('data-spec') == 'batdescription1': 138 battery = unicodedata.normalize('NFKD', cell.get_text().strip()) 139 140 if front_camera == 'No': 141 front_camera = None 142 143 if back_camera == 'No': 144 back_camera = None 145 146 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 147 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 148 image_url, 149 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 150 151 152 for new_offer in new_offers: 153 flag = False 154 flag_price = False 155 offer_id = None 156 157 for old_offer in database_offers: 158 159 if new_offer.offer_name == old_offer.offer_name: 160 flag = True 161 if new_offer.price != old_offer.price: 162 flag_price = True 163 offer_id = old_offer.offer_id 164 165 if flag: 166 # print('ALREADY IN DATABASE') 167 # print(new_offer) 168 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 169 if flag_price: 170 print('PRICE CHANGED!') # CHANGE PRICE 171 print('offer id: ' + str(offer_id)) 172 headers = {'Content-type': 'application/json'} 173 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 174 headers=headers) 66 175 else: 67 price = int(float(temp_prices.split('/')[0].replace(',', '').replace('ден', '').strip())) 68 69 response2 = requests.get(offer_url) 70 soup2 = BeautifulSoup(response2.content, 'html.parser') 71 72 colors_divs = soup2.find('div', {'class': 'color-wrapper mt-2 mb-1'}) \ 73 .find_all('div', {'class': 'color-box d-inline-block'}) # color div tags 74 75 temp_colors = [] 76 for div in colors_divs: 77 temp_colors.append(div.get('title')) 78 79 color = ",".join(temp_colors) # available colors for offer 80 81 tables = soup2.find('div', {'class': 'mobelix-specs table-white bordered-table'}).find_all('table') 82 83 operating_system = None 84 chipset = None 85 battery = None 86 ram_memory = None 87 rom_memory = None 88 front_camera = '' 89 back_camera = '' 90 cpu = None 91 offer_shop_code = None 92 offer_description = None 93 94 for table in tables: 95 for cell in table.find_all('td'): 96 if cell.get('data-spec') is None: 97 continue 98 99 if cell.get('data-spec') == 'os': 100 operating_system = unicodedata.normalize('NFKD', cell.get_text().strip()) 101 102 if cell.get('data-spec') == 'chipset': 103 chipset = unicodedata.normalize('NFKD', cell.get_text().strip()) 104 105 if cell.get('data-spec') == 'cpu': 106 cpu = unicodedata.normalize('NFKD', cell.get_text().strip()) 107 108 if cell.get('data-spec') == 'internalmemory': 109 temp_rom = [] 110 temp_ram = [] 111 temp_internalmemory = unicodedata.normalize('NFKD', cell.get_text().strip()) 112 for internalmemory in temp_internalmemory.split(','): 113 temp_rom.append(internalmemory.strip().split(' ')[0]) 114 if len(internalmemory.strip().split(' ')) > 1: 115 temp_ram.append(internalmemory.strip().split(' ')[1]) 116 rom_memory = ','.join(temp_rom) 117 ram_memory = ','.join(temp_ram) 118 119 if cell.get('data-spec') == 'cam1modules' or cell.get('data-spec') == 'cam1features' or cell.get( 120 'data-spec') == 'cam1video': 121 back_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n' 122 123 if cell.get('data-spec') == 'cam2modules' or cell.get('data-spec') == 'cam2features' or cell.get( 124 'data-spec') == 'cam2video': 125 front_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n' 126 127 if cell.get('data-spec') == 'batdescription1': 128 battery = unicodedata.normalize('NFKD', cell.get_text().strip()) 129 130 if front_camera == 'No': 131 front_camera = None 132 133 if back_camera == 'No': 134 back_camera = None 135 136 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 137 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 138 image_url, 139 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 140 141 142 for new_offer in new_offers: 143 flag = False 144 flag_price = False 145 offer_id = None 176 print('ADDED') # ADD OFFER 177 print(new_offer) 178 headers = {'Content-type': 'application/json'} 179 requests.post('http://localhost:8080/phoneoffer/addoffer', 180 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 181 182 print('------------------------------------') 146 183 147 184 for old_offer in database_offers: 148 149 if new_offer.offer_name == old_offer.offer_name: 150 flag = True 151 if new_offer.price != old_offer.price: 152 flag_price = True 153 offer_id = old_offer.offer_id 154 155 if flag: 156 # print('ALREADY IN DATABASE') 157 # print(new_offer) 158 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 159 if flag_price: 160 print('PRICE CHANGED!') # CHANGE PRICE 161 print('offer id: ' + str(offer_id)) 162 headers = {'Content-type': 'application/json'} 163 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 164 headers=headers) 165 else: 166 print('ADDED') # ADD OFFER 167 print(new_offer) 168 headers = {'Content-type': 'application/json'} 169 requests.post('http://localhost:8080/phoneoffer/addoffer', 170 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 171 172 print('------------------------------------') 173 174 for old_offer in database_offers: 175 flag = False 176 for new_offer in new_offers: 177 if old_offer.offer_name == new_offer.offer_name: 178 flag = True 179 180 if not flag: 181 print('OFFER DELETED') 182 print(old_offer) 183 # DELETE OFFER 184 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 185 flag = False 186 for new_offer in new_offers: 187 if old_offer.offer_name == new_offer.offer_name: 188 flag = True 189 190 if not flag: 191 print('OFFER DELETED') 192 print(old_offer) 193 # DELETE OFFER 194 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 195 except Exception: 196 traceback.print_exc() 197 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 198 ' VALUES (%s, %s, %s);' 199 insert_value = (offer_shop, last_updated, 'failed') 200 cur.execute(insert_script, insert_value) 201 db_connection.commit() 202 cur.close() 203 db_connection.close() 204 else: 205 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 206 ' VALUES (%s, %s, %s);' 207 insert_value = (offer_shop, last_updated, 'success') 208 cur.execute(insert_script, insert_value) 209 db_connection.commit() 210 cur.close() 211 db_connection.close() 212 -
phonelux_scrappers/scrappers/mobigo_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 18 19 is_validated = False 19 20 20 # Mobi Go phone offers that are already in database 21 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text)) 23 24 database_offers = [] 25 26 for offer in offers: 27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 28 offer['ram_memory'], 29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 31 offer['image_url'], 32 offer['offer_url'], offer['last_updated'], offer['is_validated'], 33 offer['offer_description'], 34 offer['offer_shop_code']) 35 database_offers.append(phoneOffer) 36 37 new_offers = [] 38 39 40 for i in range(1, 6): 41 mobigo_url = "https://mobigo.mk/page/" + str(i) + "/" 42 43 response1 = requests.get(mobigo_url) 44 45 soup1 = BeautifulSoup(response1.content, 'html.parser') 46 47 phone_sections = soup1.find_all('ul', {'class': 'recent-posts'}) 48 phones = phone_sections[len(phone_sections) - 1].find_all('li') 49 50 for phone in phones: 51 offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href') # offer url 52 image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src') # image url 53 offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip() # offer_name 54 55 if "Watch" in offer_name or "Tab" in offer_name: # if the product is watch or tablet, continue 56 continue 57 58 price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \ 59 .get_text().replace('ден.', '').replace('.', '').strip())) # price 60 61 response2 = requests.get(offer_url) 62 soup2 = BeautifulSoup(response2.content, 'html.parser') 63 64 brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip() # brand 65 66 if brand not in offer_name: 67 offer_name = brand + " " + offer_name 68 69 specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr') 70 71 ram_memory = None 72 rom_memory = None 73 battery = None 74 back_camera = None 75 front_camera = None 76 chipset = None 77 operating_system = None 78 cpu = None 79 offer_shop_code = None 80 offer_description = None 81 color = None 82 83 for specification in specifications: 84 if specification.find('td') == None: 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 30 31 try: 32 # Mobi Go phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text)) 34 35 database_offers = [] 36 37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 47 48 new_offers = [] 49 50 51 for i in range(1, 6): 52 mobigo_url = "https://mobigo.mk/page/" + str(i) + "/" 53 54 response1 = requests.get(mobigo_url) 55 56 soup1 = BeautifulSoup(response1.content, 'html.parser') 57 58 phone_sections = soup1.find_all('ul', {'class': 'recent-posts'}) 59 phones = phone_sections[len(phone_sections) - 1].find_all('li') 60 61 for phone in phones: 62 offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href') # offer url 63 image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src') # image url 64 offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip() # offer_name 65 66 if "Watch" in offer_name or "Tab" in offer_name: # if the product is watch or tablet, continue 85 67 continue 86 68 87 # operating system 88 if specification.find('td').get_text() == "Платформа": 89 if specification.find('i').get_text() != "/": 90 operating_system = specification.find('i').get_text().strip() 91 else: 92 operating_system = None 93 94 # chipset 95 if specification.find('td').get_text() == "Chipset": 96 if specification.find('i').get_text() != "/": 97 chipset = specification.find('i').get_text().strip() 98 else: 99 chipset = None 100 101 # ram and rom memory 102 if specification.find('td').get_text() == "Меморија": 103 if specification.find('i').get_text() != "/": 104 rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip() 105 ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip() 106 else: 107 rom_memory = None 108 ram_memory = None 109 110 # back camera 111 if specification.find('td').get_text() == "Главна Камера": 112 if specification.find('i').get_text() != "/": 113 back_camera = specification.find('i').get_text().strip() 114 else: 115 back_camera = None 116 117 # front camera 118 if specification.find('td').get_text() == "Селфи Камера": 119 if specification.find('i').get_text() != "/": 120 front_camera = specification.find('i').get_text().strip() 121 else: 122 front_camera = None 123 124 # battery 125 if specification.find('td').get_text() == "Батерија": 126 if specification.find('i').get_text() != "/": 127 battery = specification.find('i').get_text().strip() 128 else: 129 battery = None 130 131 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 132 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 133 image_url, 134 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 135 136 137 for new_offer in new_offers: 138 flag = False 139 flag_price = False 140 offer_id = None 69 price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \ 70 .get_text().replace('ден.', '').replace('.', '').strip())) # price 71 72 response2 = requests.get(offer_url) 73 soup2 = BeautifulSoup(response2.content, 'html.parser') 74 75 brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip() # brand 76 77 if brand not in offer_name: 78 offer_name = brand + " " + offer_name 79 80 specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr') 81 82 ram_memory = None 83 rom_memory = None 84 battery = None 85 back_camera = None 86 front_camera = None 87 chipset = None 88 operating_system = None 89 cpu = None 90 offer_shop_code = None 91 offer_description = None 92 color = None 93 94 for specification in specifications: 95 if specification.find('td') == None: 96 continue 97 98 # operating system 99 if specification.find('td').get_text() == "Платформа": 100 if specification.find('i').get_text() != "/": 101 operating_system = specification.find('i').get_text().strip() 102 else: 103 operating_system = None 104 105 # chipset 106 if specification.find('td').get_text() == "Chipset": 107 if specification.find('i').get_text() != "/": 108 chipset = specification.find('i').get_text().strip() 109 else: 110 chipset = None 111 112 # ram and rom memory 113 if specification.find('td').get_text() == "Меморија": 114 if specification.find('i').get_text() != "/": 115 rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip() 116 ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip() 117 else: 118 rom_memory = None 119 ram_memory = None 120 121 # back camera 122 if specification.find('td').get_text() == "Главна Камера": 123 if specification.find('i').get_text() != "/": 124 back_camera = specification.find('i').get_text().strip() 125 else: 126 back_camera = None 127 128 # front camera 129 if specification.find('td').get_text() == "Селфи Камера": 130 if specification.find('i').get_text() != "/": 131 front_camera = specification.find('i').get_text().strip() 132 else: 133 front_camera = None 134 135 # battery 136 if specification.find('td').get_text() == "Батерија": 137 if specification.find('i').get_text() != "/": 138 battery = specification.find('i').get_text().strip() 139 else: 140 battery = None 141 142 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 143 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 144 image_url, 145 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 146 147 148 for new_offer in new_offers: 149 flag = False 150 flag_price = False 151 offer_id = None 152 153 for old_offer in database_offers: 154 155 if new_offer.offer_name == old_offer.offer_name: 156 flag = True 157 if new_offer.price != old_offer.price: 158 flag_price = True 159 offer_id = old_offer.offer_id 160 161 if flag: 162 print('ALREADY IN DATABASE') 163 print(new_offer) 164 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 165 if flag_price: 166 print('PRICE CHANGED!') # CHANGE PRICE 167 print('offer id: ' + str(offer_id)) 168 headers = {'Content-type': 'application/json'} 169 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 170 headers=headers) 171 else: 172 print('ADDED') # ADD OFFER 173 print(new_offer) 174 headers = {'Content-type': 'application/json'} 175 requests.post('http://localhost:8080/phoneoffer/addoffer', 176 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 177 178 print('------------------------------------') 141 179 142 180 for old_offer in database_offers: 143 144 if new_offer.offer_name == old_offer.offer_name: 145 flag = True 146 if new_offer.price != old_offer.price: 147 flag_price = True 148 offer_id = old_offer.offer_id 149 150 if flag: 151 print('ALREADY IN DATABASE') 152 print(new_offer) 153 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 154 if flag_price: 155 print('PRICE CHANGED!') # CHANGE PRICE 156 print('offer id: ' + str(offer_id)) 157 headers = {'Content-type': 'application/json'} 158 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 159 headers=headers) 160 else: 161 print('ADDED') # ADD OFFER 162 print(new_offer) 163 headers = {'Content-type': 'application/json'} 164 requests.post('http://localhost:8080/phoneoffer/addoffer', 165 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 166 167 print('------------------------------------') 168 169 for old_offer in database_offers: 170 flag = False 171 for new_offer in new_offers: 172 if old_offer.offer_name == new_offer.offer_name: 173 flag = True 174 175 if not flag: 176 print('OFFER DELETED') 177 print(old_offer) 178 # DELETE OFFER 179 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 181 flag = False 182 for new_offer in new_offers: 183 if old_offer.offer_name == new_offer.offer_name: 184 flag = True 185 186 if not flag: 187 print('OFFER DELETED') 188 print(old_offer) 189 # DELETE OFFER 190 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 191 except Exception: 192 traceback.print_exc() 193 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 194 ' VALUES (%s, %s, %s);' 195 insert_value = (offer_shop, last_updated, 'failed') 196 cur.execute(insert_script, insert_value) 197 db_connection.commit() 198 cur.close() 199 db_connection.close() 200 else: 201 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 202 ' VALUES (%s, %s, %s);' 203 insert_value = (offer_shop, last_updated, 'success') 204 cur.execute(insert_script, insert_value) 205 db_connection.commit() 206 cur.close() 207 db_connection.close() -
phonelux_scrappers/scrappers/mobilezone_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 18 19 is_validated = False 19 20 20 # Mobile Zone phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 21 30 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text)) 31 try: 32 # Mobile Zone phone offers that are already in database 33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text)) 23 34 24 database_offers = []35 database_offers = [] 25 36 26 for offer in offers:27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],28 offer['ram_memory'],29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],31 offer['image_url'],32 offer['offer_url'], offer['last_updated'], offer['is_validated'],33 offer['offer_description'],34 offer['offer_shop_code'])35 database_offers.append(phoneOffer)37 for offer in offers: 38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 39 offer['ram_memory'], 40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 42 offer['image_url'], 43 offer['offer_url'], offer['last_updated'], offer['is_validated'], 44 offer['offer_description'], 45 offer['offer_shop_code']) 46 database_offers.append(phoneOffer) 36 47 37 new_offers = []48 new_offers = [] 38 49 39 for i in range(1, 3):40 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'50 for i in range(1, 3): 51 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/' 41 52 42 response1 = requests.get(mobilezone_url)43 soup1 = BeautifulSoup(response1.content, 'html.parser')53 response1 = requests.get(mobilezone_url) 54 soup1 = BeautifulSoup(response1.content, 'html.parser') 44 55 45 phones = soup1.find('ul', {46 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '47 '--paginated columns-4'}).find_all('li')56 phones = soup1.find('ul', { 57 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default ' 58 '--paginated columns-4'}).find_all('li') 48 59 49 for phone in phones:50 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(51 'href')52 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \53 .find('img').get('data-lazy-src')60 for phone in phones: 61 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get( 62 'href') 63 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \ 64 .find('img').get('data-lazy-src') 54 65 55 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})66 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'}) 56 67 57 if brand_section is not None: 58 brand = brand_section.find('a').get_text().strip() 68 if brand_section is not None: 69 brand = brand_section.find('a').get_text().strip() 70 else: 71 brand = None 72 73 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip() 74 75 if brand is not None and brand not in offer_name: 76 offer_name = brand + ' ' + offer_name 77 78 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'}) 79 price = None 80 81 if price_tag is not None: 82 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text() 83 .replace(',', '') 84 .replace('ден', '').strip())) 85 else: 86 continue 87 88 response2 = requests.get(offer_url) 89 soup2 = BeautifulSoup(response2.text, 'html.parser') 90 91 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr') 92 93 back_camera = None 94 front_camera = None 95 rom_memory = None 96 ram_memory = None 97 operating_system = None 98 cpu = None 99 chipset = None 100 offer_description = None 101 offer_shop_code = None 102 battery = None 103 color = None 104 105 for specification in specifications: 106 if 'Главна камера' in specification.find('th').get_text(): 107 back_camera = specification.find('td').get_text().strip() 108 109 if 'Селфи камера' in specification.find('th').get_text(): 110 front_camera = specification.find('td').get_text().strip() 111 112 if 'Батерија' in specification.find('th').get_text(): 113 battery = specification.find('td').get_text().strip() 114 115 if 'Меморија' in specification.find('th').get_text(): 116 rom_memory = specification.find('td').get_text().strip() 117 118 if 'Боја' in specification.find('th').get_text(): 119 color = specification.find('td').get_text().strip() 120 121 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 122 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 123 image_url, 124 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 125 126 for new_offer in new_offers: 127 flag = False 128 flag_price = False 129 offer_id = None 130 131 for old_offer in database_offers: 132 133 if new_offer.offer_name == old_offer.offer_name: 134 flag = True 135 if new_offer.price != old_offer.price: 136 flag_price = True 137 offer_id = old_offer.offer_id 138 139 if flag: 140 # print('ALREADY IN DATABASE') 141 # print(new_offer) 142 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 143 if flag_price: 144 print('PRICE CHANGED!') # CHANGE PRICE 145 print('offer id: ' + str(offer_id)) 146 headers = {'Content-type': 'application/json'} 147 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 148 headers=headers) 59 149 else: 60 brand = None 150 print('ADDED') # ADD OFFER 151 print(new_offer) 152 headers = {'Content-type': 'application/json'} 153 requests.post('http://localhost:8080/phoneoffer/addoffer', 154 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 61 155 62 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip() 63 64 if brand is not None and brand not in offer_name: 65 offer_name = brand + ' ' + offer_name 66 67 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'}) 68 price = None 69 70 if price_tag is not None: 71 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text() 72 .replace(',', '') 73 .replace('ден', '').strip())) 74 else: 75 continue 76 77 response2 = requests.get(offer_url) 78 soup2 = BeautifulSoup(response2.text, 'html.parser') 79 80 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr') 81 82 back_camera = None 83 front_camera = None 84 rom_memory = None 85 ram_memory = None 86 operating_system = None 87 cpu = None 88 chipset = None 89 offer_description = None 90 offer_shop_code = None 91 battery = None 92 color = None 93 94 for specification in specifications: 95 if 'Главна камера' in specification.find('th').get_text(): 96 back_camera = specification.find('td').get_text().strip() 97 98 if 'Селфи камера' in specification.find('th').get_text(): 99 front_camera = specification.find('td').get_text().strip() 100 101 if 'Батерија' in specification.find('th').get_text(): 102 battery = specification.find('td').get_text().strip() 103 104 if 'Меморија' in specification.find('th').get_text(): 105 rom_memory = specification.find('td').get_text().strip() 106 107 if 'Боја' in specification.find('th').get_text(): 108 color = specification.find('td').get_text().strip() 109 110 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 111 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 112 image_url, 113 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 114 115 for new_offer in new_offers: 116 flag = False 117 flag_price = False 118 offer_id = None 156 print('------------------------------------') 119 157 120 158 for old_offer in database_offers: 159 flag = False 160 for new_offer in new_offers: 161 if old_offer.offer_name == new_offer.offer_name: 162 flag = True 121 163 122 if new_offer.offer_name == old_offer.offer_name: 123 flag = True 124 if new_offer.price != old_offer.price: 125 flag_price = True 126 offer_id = old_offer.offer_id 127 128 if flag: 129 # print('ALREADY IN DATABASE') 130 # print(new_offer) 131 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 132 if flag_price: 133 print('PRICE CHANGED!') # CHANGE PRICE 134 print('offer id: ' + str(offer_id)) 135 headers = {'Content-type': 'application/json'} 136 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 137 headers=headers) 138 else: 139 print('ADDED') # ADD OFFER 140 print(new_offer) 141 headers = {'Content-type': 'application/json'} 142 requests.post('http://localhost:8080/phoneoffer/addoffer', 143 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 144 145 print('------------------------------------') 146 147 for old_offer in database_offers: 148 flag = False 149 for new_offer in new_offers: 150 if old_offer.offer_name == new_offer.offer_name: 151 flag = True 152 153 if not flag: 154 print('OFFER DELETED') 155 print(old_offer) 156 # DELETE OFFER 157 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 164 if not flag: 165 print('OFFER DELETED') 166 print(old_offer) 167 # DELETE OFFER 168 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 169 except Exception: 170 traceback.print_exc() 171 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 172 ' VALUES (%s, %s, %s);' 173 insert_value = (offer_shop, last_updated, 'failed') 174 cur.execute(insert_script, insert_value) 175 db_connection.commit() 176 cur.close() 177 db_connection.close() 178 else: 179 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 180 ' VALUES (%s, %s, %s);' 181 insert_value = (offer_shop, last_updated, 'success') 182 cur.execute(insert_script, insert_value) 183 db_connection.commit() 184 cur.close() 185 db_connection.close() -
phonelux_scrappers/scrappers/mobitech_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 14 15 sys.stdout = open(file_path, "w") 15 16 16 17 mobitech_url = "https://mobitech.mk/shop/"18 19 response1 = requests.get(mobitech_url)20 21 soup1 = BeautifulSoup(response1.content, 'html.parser')22 23 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})24 25 17 offer_shop = "Mobitech" # offer shop 26 18 last_updated = datetime.now().date() 27 19 is_validated = False 28 20 29 # Mobitech phone offers that are already in database 21 # Call to read the configuration file and connect to database 22 cinfo = config_read.get_databaseconfig("../postgresdb.config") 23 db_connection = psycopg2.connect( 24 database=cinfo[0], 25 host=cinfo[1], 26 user=cinfo[2], 27 password=cinfo[3] 28 ) 29 cur = db_connection.cursor() 30 30 31 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text)) 31 try: 32 mobitech_url = "https://mobitech.mk/shop/" 32 33 33 database_offers = [] 34 response1 = requests.get(mobitech_url) 34 35 35 for offer in offers: 36 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 37 offer['ram_memory'], 38 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 39 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 40 offer['image_url'], 41 offer['offer_url'], offer['last_updated'], offer['is_validated'], 42 offer['offer_description'], 43 offer['offer_shop_code']) 44 database_offers.append(phoneOffer) 36 soup1 = BeautifulSoup(response1.content, 'html.parser') 45 37 46 new_offers = [] 38 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'}) 47 39 48 for phone in phones: 49 offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href') # url 50 image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src') # image 51 brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip() # brand 52 offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip() # offer_name 53 if brand not in offer_name: 54 offer_name = brand+" "+offer_name 55 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi') 56 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price 40 # Mobitech phone offers that are already in database 41 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text)) 57 42 58 response2 = requests.get(offer_url) 59 soup2 = BeautifulSoup(response2.content, 'html.parser') 43 database_offers = [] 60 44 61 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'}) 45 for offer in offers: 46 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 47 offer['ram_memory'], 48 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 49 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 50 offer['image_url'], 51 offer['offer_url'], offer['last_updated'], offer['is_validated'], 52 offer['offer_description'], 53 offer['offer_shop_code']) 54 database_offers.append(phoneOffer) 62 55 63 ram_memory = None 64 rom_memory = None 65 battery = None 66 back_camera = None 67 front_camera = None 68 operating_system = None 69 chipset = None 70 color = None 71 offer_shop_code = None 72 cpu = None 73 offer_description = None 56 new_offers = [] 74 57 75 for specification in specifications: 76 # rom memory 77 if specification.get_text().startswith("Меморија:"): 78 rom_memory = specification.get_text().split("Меморија:")[1].strip() 79 if rom_memory == "Нема" or rom_memory == "/": 80 rom_memory = None 58 for phone in phones: 59 offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href') # url 60 image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src') # image 61 brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip() # brand 62 offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip() # offer_name 63 if brand not in offer_name: 64 offer_name = brand+" "+offer_name 65 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi') 66 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price 81 67 82 # ram memory 83 if specification.get_text().startswith("РАМ Меморија:"): 84 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\ 85 .replace('Ram', '').strip() 86 if ram_memory == "Нема" or ram_memory == "/": 87 ram_memory = None 68 response2 = requests.get(offer_url) 69 soup2 = BeautifulSoup(response2.content, 'html.parser') 88 70 89 # camera 90 if specification.get_text().startswith("Камера:"): 91 back_camera = specification.get_text().split("Камера:")[1].strip() 92 if back_camera == "Нема": 93 back_camera = None 71 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'}) 94 72 95 # operating system 96 if specification.get_text().startswith("Оперативен систем:"): 97 operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip() 98 if operating_system == "Нема": 99 operating_system = None 73 ram_memory = None 74 rom_memory = None 75 battery = None 76 back_camera = None 77 front_camera = None 78 operating_system = None 79 chipset = None 80 color = None 81 offer_shop_code = None 82 cpu = None 83 offer_description = None 100 84 101 # battery 102 if specification.get_text().startswith("Батерија:"): 103 battery = specification.get_text().split("Батерија:")[1].strip() 104 if battery == "Нема": 105 battery = None 85 for specification in specifications: 86 # rom memory 87 if specification.get_text().startswith("Меморија:"): 88 rom_memory = specification.get_text().split("Меморија:")[1].strip() 89 if rom_memory == "Нема" or rom_memory == "/": 90 rom_memory = None 106 91 107 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 108 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 109 image_url, 110 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 92 # ram memory 93 if specification.get_text().startswith("РАМ Меморија:"): 94 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\ 95 .replace('Ram', '').strip() 96 if ram_memory == "Нема" or ram_memory == "/": 97 ram_memory = None 111 98 112 for new_offer in new_offers: 113 flag = False 114 flag_price = False 115 offer_id = None 99 # camera 100 if specification.get_text().startswith("Камера:"): 101 back_camera = specification.get_text().split("Камера:")[1].strip() 102 if back_camera == "Нема": 103 back_camera = None 104 105 # operating system 106 if specification.get_text().startswith("Оперативен систем:"): 107 operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip() 108 if operating_system == "Нема": 109 operating_system = None 110 111 # battery 112 if specification.get_text().startswith("Батерија:"): 113 battery = specification.get_text().split("Батерија:")[1].strip() 114 if battery == "Нема": 115 battery = None 116 117 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 118 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 119 image_url, 120 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 121 122 for new_offer in new_offers: 123 flag = False 124 flag_price = False 125 offer_id = None 126 127 for old_offer in database_offers: 128 129 if new_offer.offer_name == old_offer.offer_name: 130 flag = True 131 if new_offer.price != old_offer.price: 132 flag_price = True 133 offer_id = old_offer.offer_id 134 135 if flag: 136 print('ALREADY IN DATABASE') 137 print(new_offer) 138 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 139 if flag_price: 140 print('PRICE CHANGED!') # CHANGE PRICE 141 print('offer id: ' + str(offer_id)) 142 headers = {'Content-type': 'application/json'} 143 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 144 headers=headers) 145 else: 146 print('ADDED') # ADD OFFER 147 print(new_offer) 148 headers = {'Content-type': 'application/json'} 149 requests.post('http://localhost:8080/phoneoffer/addoffer', 150 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 151 152 print('------------------------------------') 116 153 117 154 for old_offer in database_offers: 155 flag = False 156 for new_offer in new_offers: 157 if old_offer.offer_name == new_offer.offer_name: 158 flag = True 118 159 119 if new_offer.offer_name == old_offer.offer_name: 120 flag = True 121 if new_offer.price != old_offer.price: 122 flag_price = True 123 offer_id = old_offer.offer_id 160 if not flag: 161 print('OFFER DELETED') 162 print(old_offer) 163 # DELETE OFFER 164 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 165 except Exception: 166 traceback.print_exc() 167 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 168 ' VALUES (%s, %s, %s);' 169 insert_value = (offer_shop, last_updated, 'failed') 170 cur.execute(insert_script, insert_value) 171 db_connection.commit() 172 cur.close() 173 db_connection.close() 174 else: 175 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 176 ' VALUES (%s, %s, %s);' 177 insert_value = (offer_shop, last_updated, 'success') 178 cur.execute(insert_script, insert_value) 179 db_connection.commit() 180 cur.close() 181 db_connection.close() 124 182 125 if flag:126 print('ALREADY IN DATABASE')127 print(new_offer)128 # if it's already in database, check PRICE and if it's changed, change it !!!!!!129 if flag_price:130 print('PRICE CHANGED!') # CHANGE PRICE131 print('offer id: ' + str(offer_id))132 headers = {'Content-type': 'application/json'}133 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),134 headers=headers)135 else:136 print('ADDED') # ADD OFFER137 print(new_offer)138 headers = {'Content-type': 'application/json'}139 requests.post('http://localhost:8080/phoneoffer/addoffer',140 headers=headers, data=json.dumps(new_offer.__dict__, default=str))141 142 print('------------------------------------')143 144 for old_offer in database_offers:145 flag = False146 for new_offer in new_offers:147 if old_offer.offer_name == new_offer.offer_name:148 flag = True149 150 if not flag:151 print('OFFER DELETED')152 print(old_offer)153 # DELETE OFFER154 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))155 -
phonelux_scrappers/scrappers/neptun_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 19 20 is_validated = False 20 21 21 # Neptun phone offers that are already in database 22 # Call to read the configuration file and connect to database 23 cinfo = config_read.get_databaseconfig("../postgresdb.config") 24 db_connection = psycopg2.connect( 25 database=cinfo[0], 26 host=cinfo[1], 27 user=cinfo[2], 28 password=cinfo[3] 29 ) 30 cur = db_connection.cursor() 22 31 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text)) 32 try: 33 # Neptun phone offers that are already in database 34 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text)) 24 35 25 database_offers = []36 database_offers = [] 26 37 27 for offer in offers:28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],29 offer['ram_memory'],30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],32 offer['image_url'],33 offer['offer_url'], offer['last_updated'], offer['is_validated'],34 offer['offer_description'],35 offer['offer_shop_code'])36 database_offers.append(phoneOffer)38 for offer in offers: 39 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 40 offer['ram_memory'], 41 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 42 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 43 offer['image_url'], 44 offer['offer_url'], offer['last_updated'], offer['is_validated'], 45 offer['offer_description'], 46 offer['offer_shop_code']) 47 database_offers.append(phoneOffer) 37 48 38 new_offers = []49 new_offers = [] 39 50 40 for i in range(1, 11):41 neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i)51 for i in range(1, 11): 52 neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i) 42 53 43 # selenium is used because of the dynamic content of the page44 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')45 driver1.get(neptun_url)46 neptun_html = driver1.page_source54 # selenium is used because of the dynamic content of the page 55 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 56 driver1.get(neptun_url) 57 neptun_html = driver1.page_source 47 58 48 # closing the driver so the safari instance can pair with another webdriver session49 driver1.close()50 51 # response1 = requests.get(neptun_url)52 soup1 = BeautifulSoup(neptun_html, 'html.parser')53 54 phones = soup1.find('div', {'id': 'mainContainer'}).find('div',55 {'class': 'col-lg-9 col-md-9 col-sm-8 col-fix-main'}) \56 .find_all('div', {'class': 'ng-scope product-list-item-grid'})57 58 for phone in phones:59 offer_url = 'https://www.neptun.mk' + phone.find('a').get('href')60 offer_name = phone.find('a').find('h2').get_text().replace('MOB.TEL.', '').strip()61 brand = offer_name.split(' ')[0].strip().capitalize()62 image_url = 'https://www.neptun.mk' + phone.find('a').find('div', {'class': 'row'}).find('img').get('src')63 price = int(64 phone.find('div', {'class': 'col-sm-12 static'}).find('div', {'class': 'product-list-item__prices pt35'})65 .find('div', {'class': 'row'}).find('div', {'class': 'newPriceModel'}) \66 .find('span', {'class': 'product-price__amount--value ng-binding'}).get_text().replace('.', ''))67 68 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')69 driver1.get(offer_url)70 offer_html = driver1.page_source71 59 # closing the driver so the safari instance can pair with another webdriver session 72 60 driver1.close() 73 61 74 soup2 = BeautifulSoup(offer_html, 'html.parser') 62 # response1 = requests.get(neptun_url) 63 soup1 = BeautifulSoup(neptun_html, 'html.parser') 75 64 76 offer_shop_code = soup2.find('div', {'ng-if': 'showProductDetails'}) \77 .find('div', {'class': 'product-details-first-row'}).find('span', {78 'ng-bind': 'model.CodeNumber'}).get_text().strip()65 phones = soup1.find('div', {'id': 'mainContainer'}).find('div', 66 {'class': 'col-lg-9 col-md-9 col-sm-8 col-fix-main'}) \ 67 .find_all('div', {'class': 'ng-scope product-list-item-grid'}) 79 68 80 specifications_table = \ 81 soup2.find('div', {'id': 'mainContainer'}).find('div', {'ng-if': 'showProductDetails'}).find_all('ul')[-1] 82 specifications = specifications_table.get_text(separator='\n').strip().split("\n") 69 for phone in phones: 70 offer_url = 'https://www.neptun.mk' + phone.find('a').get('href') 71 offer_name = phone.find('a').find('h2').get_text().replace('MOB.TEL.', '').strip() 72 brand = offer_name.split(' ')[0].strip().capitalize() 73 image_url = 'https://www.neptun.mk' + phone.find('a').find('div', {'class': 'row'}).find('img').get('src') 74 price = int( 75 phone.find('div', {'class': 'col-sm-12 static'}).find('div', {'class': 'product-list-item__prices pt35'}) 76 .find('div', {'class': 'row'}).find('div', {'class': 'newPriceModel'}) \ 77 .find('span', {'class': 'product-price__amount--value ng-binding'}).get_text().replace('.', '')) 83 78 84 offer_description = specifications_table.get_text(separator='\n').strip() 79 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 80 driver1.get(offer_url) 81 offer_html = driver1.page_source 82 # closing the driver so the safari instance can pair with another webdriver session 83 driver1.close() 85 84 86 back_camera = None 87 operating_system = None 88 chipset = None 89 battery = None 90 ram_memory = None 91 rom_memory = None 92 cpu = None 93 front_camera = None 94 color = None 85 soup2 = BeautifulSoup(offer_html, 'html.parser') 95 86 96 for specification in specifications:97 if 'Батерија:' in specification:98 battery = specification.split('Батерија:')[1]87 offer_shop_code = soup2.find('div', {'ng-if': 'showProductDetails'}) \ 88 .find('div', {'class': 'product-details-first-row'}).find('span', { 89 'ng-bind': 'model.CodeNumber'}).get_text().strip() 99 90 100 if 'CPU:' in specification: 101 cpu = specification.split('CPU:')[1] 91 specifications_table = \ 92 soup2.find('div', {'id': 'mainContainer'}).find('div', {'ng-if': 'showProductDetails'}).find_all('ul')[-1] 93 specifications = specifications_table.get_text(separator='\n').strip().split("\n") 102 94 103 if 'Chipset:' in specification: 104 chipset = specification.split('Chipset:')[1] 95 offer_description = specifications_table.get_text(separator='\n').strip() 105 96 106 if 'RAM Меморија:' in specification: 107 ram_memory = specification.split('RAM Меморија:')[1] 108 continue 97 back_camera = None 98 operating_system = None 99 chipset = None 100 battery = None 101 ram_memory = None 102 rom_memory = None 103 cpu = None 104 front_camera = None 105 color = None 109 106 110 if 'ROM Меморија:' in specification:111 rom_memory = specification.split('ROM Меморија:')[1]112 continue107 for specification in specifications: 108 if 'Батерија:' in specification: 109 battery = specification.split('Батерија:')[1] 113 110 114 if 'ROM:' in specification:115 rom_memory = specification.split('ROM:')[1]111 if 'CPU:' in specification: 112 cpu = specification.split('CPU:')[1] 116 113 117 if 'RAM:' in specification:118 ram_memory = specification.split('RAM:')[1]114 if 'Chipset:' in specification: 115 chipset = specification.split('Chipset:')[1] 119 116 120 if 'iOS' in specification or 'Android' in specification: 121 operating_system = specification 117 if 'RAM Меморија:' in specification: 118 ram_memory = specification.split('RAM Меморија:')[1] 119 continue 122 120 123 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 124 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 125 image_url, 126 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 121 if 'ROM Меморија:' in specification: 122 rom_memory = specification.split('ROM Меморија:')[1] 123 continue 127 124 128 for new_offer in new_offers: 129 flag = False 130 flag_price = False 131 offer_id = None 125 if 'ROM:' in specification: 126 rom_memory = specification.split('ROM:')[1] 127 128 if 'RAM:' in specification: 129 ram_memory = specification.split('RAM:')[1] 130 131 if 'iOS' in specification or 'Android' in specification: 132 operating_system = specification 133 134 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 135 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 136 image_url, 137 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 138 139 for new_offer in new_offers: 140 flag = False 141 flag_price = False 142 offer_id = None 143 144 for old_offer in database_offers: 145 146 if new_offer.offer_shop_code == old_offer.offer_shop_code: 147 flag = True 148 if new_offer.price != old_offer.price: 149 flag_price = True 150 offer_id = old_offer.offer_id 151 152 if flag: 153 # print('ALREADY IN DATABASE') 154 # print(new_offer) 155 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 156 if flag_price: 157 print('PRICE CHANGED!') # CHANGE PRICE 158 print('offer id: ' + str(offer_id)) 159 headers = {'Content-type': 'application/json'} 160 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 161 headers=headers) 162 else: 163 print('ADDED') # ADD OFFER 164 print(new_offer) 165 headers = {'Content-type': 'application/json'} 166 requests.post('http://localhost:8080/phoneoffer/addoffer', 167 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 168 169 print('------------------------------------') 132 170 133 171 for old_offer in database_offers: 172 flag = False 173 for new_offer in new_offers: 174 if old_offer.offer_shop_code == new_offer.offer_shop_code: 175 flag = True 134 176 135 if new_offer.offer_shop_code == old_offer.offer_shop_code: 136 flag = True 137 if new_offer.price != old_offer.price: 138 flag_price = True 139 offer_id = old_offer.offer_id 140 141 if flag: 142 # print('ALREADY IN DATABASE') 143 # print(new_offer) 144 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 145 if flag_price: 146 print('PRICE CHANGED!') # CHANGE PRICE 147 print('offer id: ' + str(offer_id)) 148 headers = {'Content-type': 'application/json'} 149 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 150 headers=headers) 151 else: 152 print('ADDED') # ADD OFFER 153 print(new_offer) 154 headers = {'Content-type': 'application/json'} 155 requests.post('http://localhost:8080/phoneoffer/addoffer', 156 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 157 158 print('------------------------------------') 159 160 for old_offer in database_offers: 161 flag = False 162 for new_offer in new_offers: 163 if old_offer.offer_shop_code == new_offer.offer_shop_code: 164 flag = True 165 166 if not flag: 167 print('OFFER DELETED') 168 print(old_offer) 169 # DELETE OFFER 170 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 177 if not flag: 178 print('OFFER DELETED') 179 print(old_offer) 180 # DELETE OFFER 181 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 182 except Exception: 183 traceback.print_exc() 184 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 185 ' VALUES (%s, %s, %s);' 186 insert_value = (offer_shop, last_updated, 'failed') 187 cur.execute(insert_script, insert_value) 188 db_connection.commit() 189 cur.close() 190 db_connection.close() 191 else: 192 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 193 ' VALUES (%s, %s, %s);' 194 insert_value = (offer_shop, last_updated, 'success') 195 cur.execute(insert_script, insert_value) 196 db_connection.commit() 197 cur.close() 198 db_connection.close() -
phonelux_scrappers/scrappers/outputfile.txt
rffd50db r47f4eaf 1 ADDED2 {'offer_shop': 'Mobile Zone', 'offer_name': 'Apple iPhone 14 Pro', 'price': 95499, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Златна, Розева, Сива, Црна', 'front_camera': '12MP', 'back_camera': '48 Mp + 12 Mp + 12 Mp', 'chipset': None, 'battery': '3200mAh', 'operating_system': None, 'cpu': None, 'image_url': 'https://i0.wp.com/mobilezone.mk/wp-content/uploads/2022/09/14-pro-silver.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/iphone-14-pro/', 'last_updated': datetime.date(2022, 10, 1), 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}3 ------------------------------------4 OFFER DELETED5 {'offer_id': 1179, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung s20 FE', 'price': 24699, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Сина', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/Samsung-Galaxy-S20-FE-blue.png?resize=512%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s20-fe/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}6 OFFER DELETED7 {'offer_id': 1181, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung Z Flip3 5G', 'price': 39999, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/11.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-z-flip3-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}8 OFFER DELETED9 {'offer_id': 1180, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung S21 FE 5G', 'price': 30899, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Зелена, Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i1.wp.com/mobilezone.mk/wp-content/uploads/2022/03/5g.jpg?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s21-fe-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None} -
phonelux_scrappers/scrappers/setec_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 17 18 is_validated = False 18 19 19 # Setec phone offers that are already in database 20 # Call to read the configuration file and connect to database 21 cinfo = config_read.get_databaseconfig("../postgresdb.config") 22 db_connection = psycopg2.connect( 23 database=cinfo[0], 24 host=cinfo[1], 25 user=cinfo[2], 26 password=cinfo[3] 27 ) 28 cur = db_connection.cursor() 20 29 21 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text)) 30 try: 31 # Setec phone offers that are already in database 32 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text)) 22 33 23 database_offers = []34 database_offers = [] 24 35 25 for offer in offers:26 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],27 offer['ram_memory'],28 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],29 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],30 offer['image_url'],31 offer['offer_url'], offer['last_updated'], offer['is_validated'],32 offer['offer_description'],33 offer['offer_shop_code'])34 database_offers.append(phoneOffer)36 for offer in offers: 37 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 38 offer['ram_memory'], 39 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 40 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 41 offer['image_url'], 42 offer['offer_url'], offer['last_updated'], offer['is_validated'], 43 offer['offer_description'], 44 offer['offer_shop_code']) 45 database_offers.append(phoneOffer) 35 46 36 new_offers = []47 new_offers = [] 37 48 38 for i in range(1, 9):39 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)49 for i in range(1, 9): 50 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i) 40 51 41 response1 = requests.get(setec_url)42 soup1 = BeautifulSoup(response1.content, 'html.parser')52 response1 = requests.get(setec_url) 53 soup1 = BeautifulSoup(response1.content, 'html.parser') 43 54 44 phones = soup1.find('div', {'id': 'mfilter-content-container'}) \45 .find_all('div', {'class': 'col-sm-4 col-xs-6'})55 phones = soup1.find('div', {'id': 'mfilter-content-container'}) \ 56 .find_all('div', {'class': 'col-sm-4 col-xs-6'}) 46 57 47 for phone in phones:48 offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')49 image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')50 offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()51 brand = offer_name.split(' ')[0]58 for phone in phones: 59 offer_url = phone.find('div', {'class': 'left'}).find('a').get('href') 60 image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src') 61 offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip() 62 brand = offer_name.split(' ')[0] 52 63 53 back_camera = None54 operating_system = None55 chipset = None56 battery = None57 ram_memory = None58 rom_memory = None59 cpu = None60 front_camera = None61 color = None64 back_camera = None 65 operating_system = None 66 chipset = None 67 battery = None 68 ram_memory = None 69 rom_memory = None 70 cpu = None 71 front_camera = None 72 color = None 62 73 63 if 'Cable' in offer_name or 'AirTag' in offer_name:64 continue74 if 'Cable' in offer_name or 'AirTag' in offer_name: 75 continue 65 76 66 if brand not in offer_name:67 offer_name = brand + " " + offer_name77 if brand not in offer_name: 78 offer_name = brand + " " + offer_name 68 79 69 offer_shop_code = phone.find('div', {'class': 'right'}) \70 .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()80 offer_shop_code = phone.find('div', {'class': 'right'}) \ 81 .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip() 71 82 72 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \73 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})83 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 84 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'}) 74 85 75 if price_tag is None:76 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \77 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})86 if price_tag is None: 87 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 88 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'}) 78 89 79 price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())90 price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip()) 80 91 81 response2 = requests.get(offer_url)82 soup2 = BeautifulSoup(response2.content, 'html.parser')92 response2 = requests.get(offer_url) 93 soup2 = BeautifulSoup(response2.content, 'html.parser') 83 94 84 offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')95 offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n') 85 96 86 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,87 color, front_camera, back_camera, chipset, battery, operating_system, cpu,88 image_url,89 offer_url, last_updated, is_validated, offer_description, offer_shop_code))97 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 98 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 99 image_url, 100 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 90 101 91 for new_offer in new_offers: 92 flag = False 93 flag_price = False 94 offer_id = None 102 for new_offer in new_offers: 103 flag = False 104 flag_price = False 105 offer_id = None 106 107 for old_offer in database_offers: 108 109 if new_offer.offer_shop_code == old_offer.offer_shop_code: 110 flag = True 111 if new_offer.price != old_offer.price: 112 flag_price = True 113 offer_id = old_offer.offer_id 114 115 if flag: 116 # print('ALREADY IN DATABASE') 117 # print(new_offer) 118 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 119 if flag_price: 120 print('PRICE CHANGED!') # CHANGE PRICE 121 print('offer id: ' + str(offer_id)) 122 headers = {'Content-type': 'application/json'} 123 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 124 headers=headers) 125 else: 126 print('ADDED') # ADD OFFER 127 print(new_offer) 128 headers = {'Content-type': 'application/json'} 129 requests.post('http://localhost:8080/phoneoffer/addoffer', 130 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 131 132 print('------------------------------------') 95 133 96 134 for old_offer in database_offers: 135 flag = False 136 for new_offer in new_offers: 137 if old_offer.offer_shop_code == new_offer.offer_shop_code: 138 flag = True 97 139 98 if new_offer.offer_shop_code == old_offer.offer_shop_code: 99 flag = True 100 if new_offer.price != old_offer.price: 101 flag_price = True 102 offer_id = old_offer.offer_id 103 104 if flag: 105 # print('ALREADY IN DATABASE') 106 # print(new_offer) 107 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 108 if flag_price: 109 print('PRICE CHANGED!') # CHANGE PRICE 110 print('offer id: ' + str(offer_id)) 111 headers = {'Content-type': 'application/json'} 112 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 113 headers=headers) 114 else: 115 print('ADDED') # ADD OFFER 116 print(new_offer) 117 headers = {'Content-type': 'application/json'} 118 requests.post('http://localhost:8080/phoneoffer/addoffer', 119 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 120 121 print('------------------------------------') 122 123 for old_offer in database_offers: 124 flag = False 125 for new_offer in new_offers: 126 if old_offer.offer_shop_code == new_offer.offer_shop_code: 127 flag = True 128 129 if not flag: 130 print('OFFER DELETED') 131 print(old_offer) 132 # DELETE OFFER 133 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 140 if not flag: 141 print('OFFER DELETED') 142 print(old_offer) 143 # DELETE OFFER 144 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 145 except Exception: 146 traceback.print_exc() 147 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 148 ' VALUES (%s, %s, %s);' 149 insert_value = (offer_shop, last_updated, 'failed') 150 cur.execute(insert_script, insert_value) 151 db_connection.commit() 152 cur.close() 153 db_connection.close() 154 else: 155 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 156 ' VALUES (%s, %s, %s);' 157 insert_value = (offer_shop, last_updated, 'success') 158 cur.execute(insert_script, insert_value) 159 db_connection.commit() 160 cur.close() 161 db_connection.close() -
phonelux_scrappers/scrappers/tehnomarket_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 76 77 77 78 78 # Tehnomarket phone offers that are already in database 79 # Call to read the configuration file and connect to database 80 cinfo = config_read.get_databaseconfig("../postgresdb.config") 81 db_connection = psycopg2.connect( 82 database=cinfo[0], 83 host=cinfo[1], 84 user=cinfo[2], 85 password=cinfo[3] 86 ) 87 cur = db_connection.cursor() 79 88 80 offers = json.loads( 81 unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text)) 89 try: 90 # Tehnomarket phone offers that are already in database 91 offers = json.loads( 92 unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text)) 82 93 83 database_offers = []94 database_offers = [] 84 95 85 for offer in offers:86 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],87 offer['ram_memory'],88 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],89 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],90 offer['image_url'],91 offer['offer_url'], offer['last_updated'], offer['is_validated'],92 offer['offer_description'],93 offer['offer_shop_code'])94 database_offers.append(phoneOffer)96 for offer in offers: 97 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 98 offer['ram_memory'], 99 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 100 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 101 offer['image_url'], 102 offer['offer_url'], offer['last_updated'], offer['is_validated'], 103 offer['offer_description'], 104 offer['offer_shop_code']) 105 database_offers.append(phoneOffer) 95 106 96 new_offers = []107 new_offers = [] 97 108 98 for i in range(1, 6):99 tehnomarket_url = 'https://tehnomarket.com.mk/category/4109/mobilni-telefoni#page/' + str(i)100 # print(anhoch_url)109 for i in range(1, 6): 110 tehnomarket_url = 'https://tehnomarket.com.mk/category/4109/mobilni-telefoni#page/' + str(i) 111 # print(anhoch_url) 101 112 102 # selenium is used because of the dynamic content of the page103 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')104 driver1.get(tehnomarket_url)113 # selenium is used because of the dynamic content of the page 114 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 115 driver1.get(tehnomarket_url) 105 116 106 scrape_function(driver1, i, new_offers)117 scrape_function(driver1, i, new_offers) 107 118 108 # closing the driver so the safari instance can pair with another webdriver session109 driver1.close()119 # closing the driver so the safari instance can pair with another webdriver session 120 driver1.close() 110 121 111 for new_offer in new_offers: 112 flag = False 113 flag_price = False 114 offer_id = None 122 for new_offer in new_offers: 123 flag = False 124 flag_price = False 125 offer_id = None 126 127 for old_offer in database_offers: 128 129 if new_offer.offer_shop_code == old_offer.offer_shop_code: 130 flag = True 131 if new_offer.price != old_offer.price: 132 flag_price = True 133 offer_id = old_offer.offer_id 134 135 if flag: 136 # print('ALREADY IN DATABASE') 137 # print(new_offer) 138 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 139 if flag_price: 140 print('PRICE CHANGED!') # CHANGE PRICE 141 print('offer id: ' + str(offer_id)) 142 headers = {'Content-type': 'application/json'} 143 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 144 headers=headers) 145 else: 146 print('ADDED') # ADD OFFER 147 print(new_offer) 148 headers = {'Content-type': 'application/json'} 149 requests.post('http://localhost:8080/phoneoffer/addoffer', 150 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 151 152 print('------------------------------------') 115 153 116 154 for old_offer in database_offers: 155 flag = False 156 for new_offer in new_offers: 157 if old_offer.offer_shop_code == new_offer.offer_shop_code: 158 flag = True 117 159 118 if new_offer.offer_shop_code == old_offer.offer_shop_code: 119 flag = True 120 if new_offer.price != old_offer.price: 121 flag_price = True 122 offer_id = old_offer.offer_id 123 124 if flag: 125 # print('ALREADY IN DATABASE') 126 # print(new_offer) 127 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 128 if flag_price: 129 print('PRICE CHANGED!') # CHANGE PRICE 130 print('offer id: ' + str(offer_id)) 131 headers = {'Content-type': 'application/json'} 132 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 133 headers=headers) 134 else: 135 print('ADDED') # ADD OFFER 136 print(new_offer) 137 headers = {'Content-type': 'application/json'} 138 requests.post('http://localhost:8080/phoneoffer/addoffer', 139 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 140 141 print('------------------------------------') 142 143 for old_offer in database_offers: 144 flag = False 145 for new_offer in new_offers: 146 if old_offer.offer_shop_code == new_offer.offer_shop_code: 147 flag = True 148 149 if not flag: 150 print('OFFER DELETED') 151 print(old_offer) 152 # DELETE OFFER 153 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 160 if not flag: 161 print('OFFER DELETED') 162 print(old_offer) 163 # DELETE OFFER 164 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 165 except Exception: 166 traceback.print_exc() 167 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 168 ' VALUES (%s, %s, %s);' 169 insert_value = ('Tehnomarket', datetime.now().date(), 'failed') 170 cur.execute(insert_script, insert_value) 171 db_connection.commit() 172 cur.close() 173 db_connection.close() 174 else: 175 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 176 ' VALUES (%s, %s, %s);' 177 insert_value = ('Tehnomarket', datetime.now().date(), 'success') 178 cur.execute(insert_script, insert_value) 179 db_connection.commit() 180 cur.close() 181 db_connection.close()
Note:
See TracChangeset
for help on using the changeset viewer.