Changeset 895cd87 for phonelux_scrappers/scrappers
- Timestamp:
- 10/01/22 22:55:27 (2 years ago)
- Branches:
- master
- Children:
- fd5b100
- Parents:
- 48f3030
- Location:
- phonelux_scrappers/scrappers
- Files:
-
- 1 deleted
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/a1_scrapper.py
r48f3030 r895cd87 1 1 import unicodedata 2 2 from datetime import datetime 3 3 import json 4 4 import psycopg2 5 5 import config_read 6 6 from bs4 import BeautifulSoup 7 7 import requests 8 import sys 9 import unicodedata 8 10 9 import sys 11 from classes.phoneoffer import PhoneOffer 10 12 11 13 file_path = 'outputfile.txt' 12 14 sys.stdout = open(file_path, "w") 13 15 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 24 16 offer_shop = "A1" # offer shop 25 17 last_updated = datetime.now().date() 26 18 is_validated = False 19 20 # A1 phone offers that are already in database 21 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text)) 23 24 database_offers = [] 25 26 for offer in offers: 27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 28 offer['ram_memory'], 29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 31 offer['image_url'], 32 offer['offer_url'], offer['last_updated'], offer['is_validated'], 33 offer['offer_description'], 34 offer['offer_shop_code']) 35 database_offers.append(phoneOffer) 27 36 28 37 a1_url = 'https://www.a1.mk/webshop/mk/phones' … … 34 43 .find_all('div', {'class', 'dvc-idtfr by4'}) 35 44 45 new_offers = [] 46 36 47 for phone in phones: 37 48 brand = phone.get('data-brand').strip() 38 offer_name = brand +" "+phone.get('data-model').strip()49 offer_name = brand + " " + phone.get('data-model').strip() 39 50 40 51 # if brand not in offer_name: … … 77 88 battery = None 78 89 front_camera = None 90 chipset = None 91 offer_description = None 79 92 80 93 for row in table_rows: … … 100 113 front_camera = row.get_text().replace('Предна камера', '').strip() 101 114 102 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \ 103 'ram_memory, rom_memory, battery, back_camera, front_camera, color, cpu, ' \ 104 'operating_system, offer_shop_code, last_updated, is_validated)' \ 105 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 106 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, rom_memory, 107 battery, back_camera, front_camera, color, cpu, operating_system, offer_shop_code, 108 last_updated, is_validated) 109 cur.execute(insert_script, insert_value) 110 db_connection.commit() 115 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 116 color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url, 117 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 111 118 112 cur.close() 113 db_connection.close() 119 for new_offer in new_offers: 120 flag = False 121 flag_price = False 122 offer_id = None 123 124 for old_offer in database_offers: 125 126 if new_offer.offer_shop_code == old_offer.offer_shop_code: 127 flag = True 128 if new_offer.price != old_offer.price: 129 flag_price = True 130 offer_id = old_offer.offer_id 131 132 if flag: 133 # print('ALREADY IN DATABASE') 134 # print(new_offer) 135 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 136 if flag_price: 137 print('PRICE CHANGED!') # CHANGE PRICE 138 print('offer id: ' + str(offer_id)) 139 headers = {'Content-type': 'application/json'} 140 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 141 headers=headers) 142 else: 143 print('ADDED') # ADD OFFER 144 print(new_offer) 145 headers = {'Content-type': 'application/json'} 146 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__, 147 default=str)) 148 149 print('------------------------------------') 150 151 for old_offer in database_offers: 152 flag = False 153 for new_offer in new_offers: 154 if old_offer.offer_shop_code == new_offer.offer_shop_code: 155 flag = True 156 157 if not flag: 158 print('OFFER DELETED') 159 print(old_offer) 160 # DELETE OFFER 161 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/akcija_scrapper.py
r48f3030 r895cd87 1 import json 1 2 from datetime import datetime 2 3 … … 7 8 import unicodedata 8 9 import sys 10 from classes.phoneoffer import PhoneOffer 9 11 10 # file_path = '../outputfile.txt'11 #sys.stdout = open(file_path, "w")12 file_path = 'outputfile.txt' 13 sys.stdout = open(file_path, "w") 12 14 13 # Call to read the configuration file and connect to database14 cinfo = config_read.get_databaseconfig("../postgresdb.config")15 db_connection = psycopg2.connect(16 database=cinfo[0],17 host=cinfo[1],18 user=cinfo[2],19 password=cinfo[3]20 )21 cur = db_connection.cursor()22 15 23 16 offer_shop = "Akcija" # offer shop 24 17 last_updated = datetime.now().date() 25 18 is_validated = False 19 20 # Akcija phone offers that are already in database 21 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text)) 23 24 database_offers = [] 25 26 for offer in offers: 27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 28 offer['ram_memory'], 29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 31 offer['image_url'], 32 offer['offer_url'], offer['last_updated'], offer['is_validated'], 33 offer['offer_description'], 34 offer['offer_shop_code']) 35 database_offers.append(phoneOffer) 36 37 new_offers = [] 26 38 27 39 i = 0 … … 51 63 soup2 = BeautifulSoup(response2.text, 'html.parser') 52 64 65 back_camera = None 66 operating_system = None 67 chipset = None 68 battery = None 69 ram_memory = None 70 rom_memory = None 71 cpu = None 72 front_camera = None 73 color = None 74 offer_shop_code = None 75 53 76 specifications = soup2.find('main', {'id': 'content'}) \ 54 77 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \ … … 62 85 str(specification.get_text(separator='\n').strip())) + "\n" 63 86 64 insert_script = 'INSERT INTO phone_offers (offer_shop, brand,' \ 65 ' offer_name, price, image_url, offer_url, last_updated, is_validated, offer_description) ' \ 66 'VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);' 67 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, 68 last_updated, is_validated, offer_description) 69 cur.execute(insert_script, insert_value) 70 db_connection.commit() 87 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 88 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 89 image_url, 90 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 71 91 i += 20 72 92 73 cur.close() 74 db_connection.close() 93 for new_offer in new_offers: 94 flag = False 95 flag_price = False 96 offer_id = None 97 98 for old_offer in database_offers: 99 100 if new_offer.offer_name == old_offer.offer_name: 101 flag = True 102 if new_offer.price != old_offer.price: 103 flag_price = True 104 offer_id = old_offer.offer_id 105 106 if flag: 107 # print('ALREADY IN DATABASE') 108 # print(new_offer) 109 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 110 if flag_price: 111 print('PRICE CHANGED!') # CHANGE PRICE 112 print('offer id: ' + str(offer_id)) 113 headers = {'Content-type': 'application/json'} 114 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 115 headers=headers) 116 else: 117 print('ADDED') # ADD OFFER 118 print(new_offer) 119 headers = {'Content-type': 'application/json'} 120 requests.post('http://localhost:8080/phoneoffer/addoffer', 121 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 122 123 print('------------------------------------') 124 125 for old_offer in database_offers: 126 flag = False 127 for new_offer in new_offers: 128 if old_offer.offer_name == new_offer.offer_name: 129 flag = True 130 131 if not flag: 132 print('OFFER DELETED') 133 print(old_offer) 134 # DELETE OFFER 135 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/handy_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime … … 9 10 import sys 10 11 12 from classes.phoneoffer import PhoneOffer 13 11 14 file_path = 'outputfile.txt' 12 15 sys.stdout = open(file_path, "w") 13 16 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 17 24 18 offer_shop = "Handy" # offer shop 25 19 last_updated = datetime.now().date() 26 20 is_validated = False 21 22 # Handy phone offers that are already in database 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text)) 24 25 database_offers = [] 26 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 38 new_offers = [] 27 39 28 40 handy_url = 'https://www.handy.mk/telefoni?page=6' … … 44 56 soup2 = BeautifulSoup(response2.text, 'html.parser') 45 57 58 back_camera = None 59 operating_system = None 60 chipset = None 61 battery = None 62 ram_memory = None 63 rom_memory = None 64 cpu = None 65 front_camera = None 66 offer_shop_code = None 67 color = None 68 image_url = None 69 46 70 color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'}) 47 48 color = None49 71 if color_section is not None: 50 temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase35 63640754__container'})\72 temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\ 51 73 .find_all('input', {'type': 'radio'}) 52 74 colors_list = [] … … 67 89 offer_description = '\n'.join(specifications) 68 90 69 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, offer_url, ' \ 70 'offer_description, last_updated, is_validated)' \ 71 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s);' 72 insert_value = (offer_shop, brand, offer_name, price, offer_url, offer_description, 73 last_updated, is_validated) 74 cur.execute(insert_script, insert_value) 75 db_connection.commit() 91 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 92 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 93 image_url, 94 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 76 95 77 cur.close() 78 db_connection.close() 96 for new_offer in new_offers: 97 flag = False 98 flag_price = False 99 offer_id = None 100 101 for old_offer in database_offers: 102 103 if new_offer.offer_name == old_offer.offer_name: 104 flag = True 105 if new_offer.price != old_offer.price: 106 flag_price = True 107 offer_id = old_offer.offer_id 108 109 if flag: 110 # print('ALREADY IN DATABASE') 111 # print(new_offer) 112 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 113 if flag_price: 114 print('PRICE CHANGED!') # CHANGE PRICE 115 print('offer id: ' + str(offer_id)) 116 headers = {'Content-type': 'application/json'} 117 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 118 headers=headers) 119 else: 120 print('ADDED') # ADD OFFER 121 print(new_offer) 122 headers = {'Content-type': 'application/json'} 123 requests.post('http://localhost:8080/phoneoffer/addoffer', 124 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 125 126 print('------------------------------------') 127 128 for old_offer in database_offers: 129 flag = False 130 for new_offer in new_offers: 131 if old_offer.offer_name == new_offer.offer_name: 132 flag = True 133 134 if not flag: 135 print('OFFER DELETED') 136 print(old_offer) 137 # DELETE OFFER 138 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 139 140 -
phonelux_scrappers/scrappers/ledikom_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime 3 4 4 import psycopg2 5 5 import config_read … … 10 10 import sys 11 11 12 from classes.phoneoffer import PhoneOffer 13 12 14 file_path = 'outputfile.txt' 13 15 sys.stdout = open(file_path, "w") 14 15 # Call to read the configuration file and connect to database16 cinfo = config_read.get_databaseconfig("../postgresdb.config")17 db_connection = psycopg2.connect(18 database=cinfo[0],19 host=cinfo[1],20 user=cinfo[2],21 password=cinfo[3]22 )23 cur = db_connection.cursor()24 16 25 17 offer_shop = "Ledikom" # offer shop 26 18 last_updated = datetime.now().date() 27 19 is_validated = False 20 21 # Ledikom phone offers that are already in database 22 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/ledikom').text)) 24 25 database_offers = [] 26 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 38 new_offers = [] 28 39 29 40 ledikom_phone_urls = [ … … 65 76 offer_name = ' '.join(temp_offer_name.split()) 66 77 brand = offer_name.split(' ')[0] 67 price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '').replace('.', '').strip()) 78 price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '') 79 .replace('ден', '') 80 .replace('.', '').strip()) 68 81 69 82 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') … … 82 95 rom_memory = None 83 96 ram_memory = None 97 back_camera = None 98 operating_system = None 99 chipset = None 100 battery = None 101 cpu = None 102 front_camera = None 103 offer_shop_code = None 104 offer_description = None 84 105 85 106 if len(specifications) != 0: … … 114 135 color = temp 115 136 116 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \ 117 'ram_memory, rom_memory, color, last_updated, is_validated)' \ 118 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 119 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, 120 rom_memory, color, last_updated, is_validated) 121 cur.execute(insert_script, insert_value) 122 db_connection.commit() 137 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 138 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 139 image_url, 140 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 123 141 124 cur.close() 125 db_connection.close() 142 for new_offer in new_offers: 143 flag = False 144 flag_price = False 145 offer_id = None 146 147 for old_offer in database_offers: 148 149 if new_offer.offer_name == old_offer.offer_name: 150 flag = True 151 if new_offer.price != old_offer.price: 152 flag_price = True 153 offer_id = old_offer.offer_id 154 155 if flag: 156 # print('ALREADY IN DATABASE') 157 # print(new_offer) 158 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 159 if flag_price: 160 print('PRICE CHANGED!') # CHANGE PRICE 161 print('offer id: ' + str(offer_id)) 162 headers = {'Content-type': 'application/json'} 163 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 164 headers=headers) 165 else: 166 print('ADDED') # ADD OFFER 167 print(new_offer) 168 headers = {'Content-type': 'application/json'} 169 requests.post('http://localhost:8080/phoneoffer/addoffer', 170 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 171 172 print('------------------------------------') 173 174 for old_offer in database_offers: 175 flag = False 176 for new_offer in new_offers: 177 if old_offer.offer_name == new_offer.offer_name: 178 flag = True 179 180 if not flag: 181 print('OFFER DELETED') 182 print(old_offer) 183 # DELETE OFFER 184 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/mobelix_scrapper.py
r48f3030 r895cd87 1 import json 2 import sys 1 3 import unicodedata 2 4 from datetime import datetime … … 8 10 9 11 # import sys 10 # 11 # file_path = 'outputfile.txt' 12 # sys.stdout = open(file_path, "w") 12 from classes.phoneoffer import PhoneOffer 13 13 14 # Call to read the configuration file and connect to database 15 cinfo = config_read.get_databaseconfig("../postgresdb.config") 16 db_connection = psycopg2.connect( 17 database=cinfo[0], 18 host=cinfo[1], 19 user=cinfo[2], 20 password=cinfo[3] 21 ) 22 cur = db_connection.cursor() 14 file_path = 'outputfile.txt' 15 sys.stdout = open(file_path, "w") 23 16 24 17 offer_shop = "Mobelix" # offer shop 25 18 last_updated = datetime.now().date() 26 19 is_validated = False 20 21 # Mobelix phone offers that are already in database 22 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text)) 24 25 database_offers = [] 26 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 38 new_offers = [] 27 39 28 40 for i in range(1, 17): … … 77 89 back_camera = '' 78 90 cpu = None 91 offer_shop_code = None 92 offer_description = None 79 93 80 94 for table in tables: … … 120 134 back_camera = None 121 135 122 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \ 123 'ram_memory, rom_memory, battery, back_camera, front_camera, color, cpu, chipset, ' \ 124 'operating_system, last_updated, is_validated)' \ 125 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 126 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, rom_memory, 127 battery, back_camera, front_camera, color, cpu, chipset, operating_system, 128 last_updated, is_validated) 129 cur.execute(insert_script, insert_value) 130 db_connection.commit() 136 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 137 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 138 image_url, 139 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 131 140 132 cur.close() 133 db_connection.close() 141 142 for new_offer in new_offers: 143 flag = False 144 flag_price = False 145 offer_id = None 146 147 for old_offer in database_offers: 148 149 if new_offer.offer_name == old_offer.offer_name: 150 flag = True 151 if new_offer.price != old_offer.price: 152 flag_price = True 153 offer_id = old_offer.offer_id 154 155 if flag: 156 # print('ALREADY IN DATABASE') 157 # print(new_offer) 158 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 159 if flag_price: 160 print('PRICE CHANGED!') # CHANGE PRICE 161 print('offer id: ' + str(offer_id)) 162 headers = {'Content-type': 'application/json'} 163 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 164 headers=headers) 165 else: 166 print('ADDED') # ADD OFFER 167 print(new_offer) 168 headers = {'Content-type': 'application/json'} 169 requests.post('http://localhost:8080/phoneoffer/addoffer', 170 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 171 172 print('------------------------------------') 173 174 for old_offer in database_offers: 175 flag = False 176 for new_offer in new_offers: 177 if old_offer.offer_name == new_offer.offer_name: 178 flag = True 179 180 if not flag: 181 print('OFFER DELETED') 182 print(old_offer) 183 # DELETE OFFER 184 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/mobigo_scrapper.py
r48f3030 r895cd87 1 import json 2 import unicodedata 1 3 from datetime import datetime 2 4 … … 5 7 from bs4 import BeautifulSoup 6 8 import requests 9 import sys 7 10 8 # import sys 9 # 10 # file_path = 'outputfile.txt' 11 # sys.stdout = open(file_path, "w") 11 from classes.phoneoffer import PhoneOffer 12 12 13 # Call to read the configuration file and connect to database 14 cinfo = config_read.get_databaseconfig("../postgresdb.config") 15 db_connection = psycopg2.connect( 16 database=cinfo[0], 17 host=cinfo[1], 18 user=cinfo[2], 19 password=cinfo[3] 20 ) 21 cur = db_connection.cursor() 13 file_path = 'outputfile.txt' 14 sys.stdout = open(file_path, "w") 22 15 23 16 offer_shop = "Mobi Go" # offer shop 24 17 last_updated = datetime.now().date() 25 18 is_validated = False 19 20 # Mobi Go phone offers that are already in database 21 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text)) 23 24 database_offers = [] 25 26 for offer in offers: 27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 28 offer['ram_memory'], 29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 31 offer['image_url'], 32 offer['offer_url'], offer['last_updated'], offer['is_validated'], 33 offer['offer_description'], 34 offer['offer_shop_code']) 35 database_offers.append(phoneOffer) 36 37 new_offers = [] 38 39 26 40 for i in range(1, 6): 27 41 mobigo_url = "https://mobigo.mk/page/" + str(i) + "/" … … 55 69 specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr') 56 70 57 ram_memory = "" 58 rom_memory = "" 59 battery = "" 60 back_camera = "" 61 front_camera = "" 62 chipset = "" 63 operating_system = "" 71 ram_memory = None 72 rom_memory = None 73 battery = None 74 back_camera = None 75 front_camera = None 76 chipset = None 77 operating_system = None 78 cpu = None 79 offer_shop_code = None 80 offer_description = None 81 color = None 64 82 65 83 for specification in specifications: … … 111 129 battery = None 112 130 113 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,' \ 114 ' rom_memory, battery, back_camera, front_camera, chipset, operating_system, last_updated, is_validated)' \ 115 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 116 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, 117 rom_memory, battery, back_camera, front_camera, chipset, operating_system, last_updated, is_validated) 118 cur.execute(insert_script, insert_value) 119 db_connection.commit() 131 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 132 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 133 image_url, 134 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 120 135 121 cur.close() 122 db_connection.close() 136 137 for new_offer in new_offers: 138 flag = False 139 flag_price = False 140 offer_id = None 141 142 for old_offer in database_offers: 143 144 if new_offer.offer_name == old_offer.offer_name: 145 flag = True 146 if new_offer.price != old_offer.price: 147 flag_price = True 148 offer_id = old_offer.offer_id 149 150 if flag: 151 print('ALREADY IN DATABASE') 152 print(new_offer) 153 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 154 if flag_price: 155 print('PRICE CHANGED!') # CHANGE PRICE 156 print('offer id: ' + str(offer_id)) 157 headers = {'Content-type': 'application/json'} 158 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 159 headers=headers) 160 else: 161 print('ADDED') # ADD OFFER 162 print(new_offer) 163 headers = {'Content-type': 'application/json'} 164 requests.post('http://localhost:8080/phoneoffer/addoffer', 165 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 166 167 print('------------------------------------') 168 169 for old_offer in database_offers: 170 flag = False 171 for new_offer in new_offers: 172 if old_offer.offer_name == new_offer.offer_name: 173 flag = True 174 175 if not flag: 176 print('OFFER DELETED') 177 print(old_offer) 178 # DELETE OFFER 179 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/mobilezone_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime … … 6 7 from selenium import webdriver 7 8 import requests 9 import sys 8 10 9 import sys 11 from classes.phoneoffer import PhoneOffer 10 12 11 13 file_path = 'outputfile.txt' 12 14 sys.stdout = open(file_path, "w") 13 15 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 24 16 offer_shop = "Mobile Zone" # offer shop 25 17 last_updated = datetime.now().date() 26 18 is_validated = False 19 20 # Mobile Zone phone offers that are already in database 21 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text)) 23 24 database_offers = [] 25 26 for offer in offers: 27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 28 offer['ram_memory'], 29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 31 offer['image_url'], 32 offer['offer_url'], offer['last_updated'], offer['is_validated'], 33 offer['offer_description'], 34 offer['offer_shop_code']) 35 database_offers.append(phoneOffer) 36 37 new_offers = [] 27 38 28 39 for i in range(1, 3): … … 54 65 offer_name = brand + ' ' + offer_name 55 66 56 price = int(unicodedata.normalize('NFKD', phone.find('span', {'class': 'woocommerce-Price-amount amount'}) 57 .find('bdi').get_text().replace(',', '').replace('ден', '').strip())) 67 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'}) 68 price = None 69 70 if price_tag is not None: 71 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text() 72 .replace(',', '') 73 .replace('ден', '').strip())) 74 else: 75 continue 58 76 59 77 response2 = requests.get(offer_url) … … 65 83 front_camera = None 66 84 rom_memory = None 85 ram_memory = None 86 operating_system = None 87 cpu = None 88 chipset = None 89 offer_description = None 90 offer_shop_code = None 67 91 battery = None 68 92 color = None … … 84 108 color = specification.find('td').get_text().strip() 85 109 110 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 111 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 112 image_url, 113 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 86 114 115 for new_offer in new_offers: 116 flag = False 117 flag_price = False 118 offer_id = None 87 119 88 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, offer_url, image_url, ' \ 89 'rom_memory, battery, color, front_camera, back_camera, last_updated, is_validated)' \ 90 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 91 insert_value = (offer_shop, brand, offer_name, price, offer_url, image_url, rom_memory, battery, color, 92 front_camera, back_camera, last_updated, is_validated) 93 cur.execute(insert_script, insert_value) 94 db_connection.commit() 120 for old_offer in database_offers: 95 121 96 cur.close() 97 db_connection.close() 122 if new_offer.offer_name == old_offer.offer_name: 123 flag = True 124 if new_offer.price != old_offer.price: 125 flag_price = True 126 offer_id = old_offer.offer_id 127 128 if flag: 129 # print('ALREADY IN DATABASE') 130 # print(new_offer) 131 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 132 if flag_price: 133 print('PRICE CHANGED!') # CHANGE PRICE 134 print('offer id: ' + str(offer_id)) 135 headers = {'Content-type': 'application/json'} 136 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 137 headers=headers) 138 else: 139 print('ADDED') # ADD OFFER 140 print(new_offer) 141 headers = {'Content-type': 'application/json'} 142 requests.post('http://localhost:8080/phoneoffer/addoffer', 143 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 144 145 print('------------------------------------') 146 147 for old_offer in database_offers: 148 flag = False 149 for new_offer in new_offers: 150 if old_offer.offer_name == new_offer.offer_name: 151 flag = True 152 153 if not flag: 154 print('OFFER DELETED') 155 print(old_offer) 156 # DELETE OFFER 157 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/mobitech_scrapper.py
r48f3030 r895cd87 1 import json 2 import unicodedata 1 3 from datetime import datetime 2 4 … … 5 7 from bs4 import BeautifulSoup 6 8 import requests 9 import sys 7 10 8 # import sys 9 # file_path = 'outputfile.txt' 10 # sys.stdout = open(file_path, "w") 11 from classes.phoneoffer import PhoneOffer 11 12 12 # Call to read the configuration file and connect to database 13 cinfo = config_read.get_databaseconfig("../postgresdb.config") 14 db_connection = psycopg2.connect( 15 database=cinfo[0], 16 host=cinfo[1], 17 user=cinfo[2], 18 password=cinfo[3] 19 ) 20 cur = db_connection.cursor() 13 file_path = 'outputfile.txt' 14 sys.stdout = open(file_path, "w") 15 21 16 22 17 mobitech_url = "https://mobitech.mk/shop/" … … 29 24 30 25 offer_shop = "Mobitech" # offer shop 26 last_updated = datetime.now().date() 31 27 is_validated = False 28 29 # Mobitech phone offers that are already in database 30 31 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text)) 32 33 database_offers = [] 34 35 for offer in offers: 36 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 37 offer['ram_memory'], 38 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 39 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 40 offer['image_url'], 41 offer['offer_url'], offer['last_updated'], offer['is_validated'], 42 offer['offer_description'], 43 offer['offer_shop_code']) 44 database_offers.append(phoneOffer) 45 46 new_offers = [] 32 47 33 48 for phone in phones: … … 40 55 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi') 41 56 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price 42 last_updated = datetime.now().date() # offer last_updated date43 57 44 58 response2 = requests.get(offer_url) … … 47 61 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'}) 48 62 49 ram_memory = "" 50 rom_memory = "" 51 battery = "" 52 back_camera = "" 53 operating_system = "" 63 ram_memory = None 64 rom_memory = None 65 battery = None 66 back_camera = None 67 front_camera = None 68 operating_system = None 69 chipset = None 70 color = None 71 offer_shop_code = None 72 cpu = None 73 offer_description = None 54 74 55 75 for specification in specifications: … … 62 82 # ram memory 63 83 if specification.get_text().startswith("РАМ Меморија:"): 64 ram_memory = specification.get_text().split("РАМ Меморија:")[1].strip() 84 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\ 85 .replace('Ram', '').strip() 65 86 if ram_memory == "Нема" or ram_memory == "/": 66 87 ram_memory = None … … 84 105 battery = None 85 106 86 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,' \ 87 ' rom_memory, battery, back_camera, last_updated, operating_system, is_validated)' \ 88 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 89 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, 90 rom_memory, battery, back_camera, last_updated, operating_system, is_validated) 91 cur.execute(insert_script, insert_value) 92 db_connection.commit() 107 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 108 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 109 image_url, 110 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 93 111 94 cur.close() 95 db_connection.close() 112 for new_offer in new_offers: 113 flag = False 114 flag_price = False 115 offer_id = None 116 117 for old_offer in database_offers: 118 119 if new_offer.offer_name == old_offer.offer_name: 120 flag = True 121 if new_offer.price != old_offer.price: 122 flag_price = True 123 offer_id = old_offer.offer_id 124 125 if flag: 126 print('ALREADY IN DATABASE') 127 print(new_offer) 128 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 129 if flag_price: 130 print('PRICE CHANGED!') # CHANGE PRICE 131 print('offer id: ' + str(offer_id)) 132 headers = {'Content-type': 'application/json'} 133 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 134 headers=headers) 135 else: 136 print('ADDED') # ADD OFFER 137 print(new_offer) 138 headers = {'Content-type': 'application/json'} 139 requests.post('http://localhost:8080/phoneoffer/addoffer', 140 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 141 142 print('------------------------------------') 143 144 for old_offer in database_offers: 145 flag = False 146 for new_offer in new_offers: 147 if old_offer.offer_name == new_offer.offer_name: 148 flag = True 149 150 if not flag: 151 print('OFFER DELETED') 152 print(old_offer) 153 # DELETE OFFER 154 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 155 -
phonelux_scrappers/scrappers/neptun_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime … … 9 10 import sys 10 11 12 from classes.phoneoffer import PhoneOffer 13 11 14 file_path = 'outputfile.txt' 12 15 sys.stdout = open(file_path, "w") 13 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 16 24 17 offer_shop = "Neptun" # offer shop … … 26 19 is_validated = False 27 20 21 # Neptun phone offers that are already in database 22 23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text)) 24 25 database_offers = [] 26 27 for offer in offers: 28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 29 offer['ram_memory'], 30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 32 offer['image_url'], 33 offer['offer_url'], offer['last_updated'], offer['is_validated'], 34 offer['offer_description'], 35 offer['offer_shop_code']) 36 database_offers.append(phoneOffer) 37 38 new_offers = [] 39 28 40 for i in range(1, 11): 29 neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' +str(i)41 neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i) 30 42 31 43 # selenium is used because of the dynamic content of the page … … 72 84 offer_description = specifications_table.get_text(separator='\n').strip() 73 85 86 back_camera = None 74 87 operating_system = None 75 88 chipset = None … … 78 91 rom_memory = None 79 92 cpu = None 93 front_camera = None 94 color = None 95 80 96 for specification in specifications: 81 97 if 'Батерија:' in specification: … … 105 121 operating_system = specification 106 122 107 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, image_url, offer_url,' \ 108 'offer_shop_code, operating_system, battery, chipset, cpu, ram_memory, rom_memory, ' \ 109 'offer_description, last_updated, is_validated)' \ 110 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 111 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, 112 offer_shop_code, operating_system, battery, chipset, cpu, ram_memory, rom_memory, offer_description, 113 last_updated, is_validated) 114 cur.execute(insert_script, insert_value) 115 db_connection.commit() 123 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 124 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 125 image_url, 126 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 116 127 117 cur.close() 118 db_connection.close() 128 for new_offer in new_offers: 129 flag = False 130 flag_price = False 131 offer_id = None 132 133 for old_offer in database_offers: 134 135 if new_offer.offer_shop_code == old_offer.offer_shop_code: 136 flag = True 137 if new_offer.price != old_offer.price: 138 flag_price = True 139 offer_id = old_offer.offer_id 140 141 if flag: 142 # print('ALREADY IN DATABASE') 143 # print(new_offer) 144 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 145 if flag_price: 146 print('PRICE CHANGED!') # CHANGE PRICE 147 print('offer id: ' + str(offer_id)) 148 headers = {'Content-type': 'application/json'} 149 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 150 headers=headers) 151 else: 152 print('ADDED') # ADD OFFER 153 print(new_offer) 154 headers = {'Content-type': 'application/json'} 155 requests.post('http://localhost:8080/phoneoffer/addoffer', 156 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 157 158 print('------------------------------------') 159 160 for old_offer in database_offers: 161 flag = False 162 for new_offer in new_offers: 163 if old_offer.offer_shop_code == new_offer.offer_shop_code: 164 flag = True 165 166 if not flag: 167 print('OFFER DELETED') 168 print(old_offer) 169 # DELETE OFFER 170 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/outputfile.txt
r48f3030 r895cd87 1 ADDED 2 {'offer_shop': 'Mobile Zone', 'offer_name': 'Apple iPhone 14 Pro', 'price': 95499, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Златна, Розева, Сива, Црна', 'front_camera': '12MP', 'back_camera': '48 Mp + 12 Mp + 12 Mp', 'chipset': None, 'battery': '3200mAh', 'operating_system': None, 'cpu': None, 'image_url': 'https://i0.wp.com/mobilezone.mk/wp-content/uploads/2022/09/14-pro-silver.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/iphone-14-pro/', 'last_updated': datetime.date(2022, 10, 1), 'is_validated': False, 'offer_description': None, 'offer_shop_code': None} 3 ------------------------------------ 4 OFFER DELETED 5 {'offer_id': 1179, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung s20 FE', 'price': 24699, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Сина', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/Samsung-Galaxy-S20-FE-blue.png?resize=512%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s20-fe/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None} 6 OFFER DELETED 7 {'offer_id': 1181, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung Z Flip3 5G', 'price': 39999, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/11.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-z-flip3-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None} 8 OFFER DELETED 9 {'offer_id': 1180, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung S21 FE 5G', 'price': 30899, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Зелена, Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i1.wp.com/mobilezone.mk/wp-content/uploads/2022/03/5g.jpg?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s21-fe-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None} -
phonelux_scrappers/scrappers/setec_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime 3 4 4 import psycopg2 5 5 import config_read 6 6 from bs4 import BeautifulSoup 7 7 import requests 8 import sys 8 9 9 import sys 10 from classes.phoneoffer import PhoneOffer 10 11 11 12 file_path = 'outputfile.txt' 12 13 sys.stdout = open(file_path, "w") 13 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 14 24 15 offer_shop = "Setec" # offer shop … … 26 17 is_validated = False 27 18 28 for i in range(1, 7): 29 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page='+str(i) 19 # Setec phone offers that are already in database 20 21 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text)) 22 23 database_offers = [] 24 25 for offer in offers: 26 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 27 offer['ram_memory'], 28 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 29 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 30 offer['image_url'], 31 offer['offer_url'], offer['last_updated'], offer['is_validated'], 32 offer['offer_description'], 33 offer['offer_shop_code']) 34 database_offers.append(phoneOffer) 35 36 new_offers = [] 37 38 for i in range(1, 9): 39 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i) 30 40 31 41 response1 = requests.get(setec_url) … … 41 51 brand = offer_name.split(' ')[0] 42 52 53 back_camera = None 54 operating_system = None 55 chipset = None 56 battery = None 57 ram_memory = None 58 rom_memory = None 59 cpu = None 60 front_camera = None 61 color = None 62 43 63 if 'Cable' in offer_name or 'AirTag' in offer_name: 44 64 continue … … 49 69 offer_shop_code = phone.find('div', {'class': 'right'}) \ 50 70 .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip() 51 price = int(phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 52 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'}) \ 53 .get_text().replace('Ден.', '').replace(',', '').strip()) 71 72 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 73 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'}) 74 75 if price_tag is None: 76 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \ 77 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'}) 78 79 price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip()) 54 80 55 81 response2 = requests.get(offer_url) … … 58 84 offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n') 59 85 60 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, image_url, offer_url,' \ 61 'offer_shop_code, offer_description, last_updated, is_validated)' \ 62 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 63 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, 64 offer_shop_code, offer_description, last_updated, is_validated) 65 cur.execute(insert_script, insert_value) 66 db_connection.commit() 86 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 87 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 88 image_url, 89 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 67 90 68 cur.close() 69 db_connection.close() 91 for new_offer in new_offers: 92 flag = False 93 flag_price = False 94 offer_id = None 95 96 for old_offer in database_offers: 97 98 if new_offer.offer_shop_code == old_offer.offer_shop_code: 99 flag = True 100 if new_offer.price != old_offer.price: 101 flag_price = True 102 offer_id = old_offer.offer_id 103 104 if flag: 105 # print('ALREADY IN DATABASE') 106 # print(new_offer) 107 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 108 if flag_price: 109 print('PRICE CHANGED!') # CHANGE PRICE 110 print('offer id: ' + str(offer_id)) 111 headers = {'Content-type': 'application/json'} 112 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 113 headers=headers) 114 else: 115 print('ADDED') # ADD OFFER 116 print(new_offer) 117 headers = {'Content-type': 'application/json'} 118 requests.post('http://localhost:8080/phoneoffer/addoffer', 119 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 120 121 print('------------------------------------') 122 123 for old_offer in database_offers: 124 flag = False 125 for new_offer in new_offers: 126 if old_offer.offer_shop_code == new_offer.offer_shop_code: 127 flag = True 128 129 if not flag: 130 print('OFFER DELETED') 131 print(old_offer) 132 # DELETE OFFER 133 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) -
phonelux_scrappers/scrappers/tehnomarket_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime … … 6 7 from selenium import webdriver 7 8 import requests 9 import sys 8 10 9 import sys 11 from classes.phoneoffer import PhoneOffer 10 12 11 13 file_path = 'outputfile.txt' 12 14 sys.stdout = open(file_path, "w") 13 15 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 16 24 25 def scrape_function(driver1, i): 17 def scrape_function(driver1, i, new_offers): 26 18 offer_shop = "Tehnomarket" # offer shop 27 19 last_updated = datetime.now().date() … … 59 51 offer_shop_code = details[4].strip() 60 52 53 back_camera = None 54 operating_system = None 55 chipset = None 56 battery = None 57 ram_memory = None 58 rom_memory = None 59 cpu = None 60 front_camera = None 61 color = None 62 61 63 specifications = [] 62 64 for info in soup2.find_all('span', {'class': 'info'}): 63 65 specifications.append(info.get_text()) 64 66 65 print(brand)66 print(offer_name)67 print()68 print()69 70 67 offer_description = '\n'.join(specifications) 71 68 72 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \ 73 'offer_description, offer_shop_code, last_updated, is_validated)' \ 74 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 75 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, offer_description, 76 offer_shop_code, last_updated, is_validated) 77 cur.execute(insert_script, insert_value) 78 db_connection.commit() 69 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 70 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 71 image_url, 72 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 79 73 else: 80 74 driver1.implicitly_wait(30) 81 scrape_function(driver1, i )75 scrape_function(driver1, i, new_offers) 82 76 77 78 # Tehnomarket phone offers that are already in database 79 80 offers = json.loads( 81 unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text)) 82 83 database_offers = [] 84 85 for offer in offers: 86 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 87 offer['ram_memory'], 88 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 89 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 90 offer['image_url'], 91 offer['offer_url'], offer['last_updated'], offer['is_validated'], 92 offer['offer_description'], 93 offer['offer_shop_code']) 94 database_offers.append(phoneOffer) 95 96 new_offers = [] 83 97 84 98 for i in range(1, 6): … … 90 104 driver1.get(tehnomarket_url) 91 105 92 scrape_function(driver1, i) 106 scrape_function(driver1, i, new_offers) 107 93 108 # closing the driver so the safari instance can pair with another webdriver session 94 109 driver1.close() 95 110 96 cur.close() 97 db_connection.close() 111 for new_offer in new_offers: 112 flag = False 113 flag_price = False 114 offer_id = None 115 116 for old_offer in database_offers: 117 118 if new_offer.offer_shop_code == old_offer.offer_shop_code: 119 flag = True 120 if new_offer.price != old_offer.price: 121 flag_price = True 122 offer_id = old_offer.offer_id 123 124 if flag: 125 # print('ALREADY IN DATABASE') 126 # print(new_offer) 127 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 128 if flag_price: 129 print('PRICE CHANGED!') # CHANGE PRICE 130 print('offer id: ' + str(offer_id)) 131 headers = {'Content-type': 'application/json'} 132 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 133 headers=headers) 134 else: 135 print('ADDED') # ADD OFFER 136 print(new_offer) 137 headers = {'Content-type': 'application/json'} 138 requests.post('http://localhost:8080/phoneoffer/addoffer', 139 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 140 141 print('------------------------------------') 142 143 for old_offer in database_offers: 144 flag = False 145 for new_offer in new_offers: 146 if old_offer.offer_shop_code == new_offer.offer_shop_code: 147 flag = True 148 149 if not flag: 150 print('OFFER DELETED') 151 print(old_offer) 152 # DELETE OFFER 153 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note:
See TracChangeset
for help on using the changeset viewer.