Changeset 47f4eaf for phonelux_scrappers/scrappers/tehnomarket_scrapper.py
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/tehnomarket_scrapper.py
rffd50db r47f4eaf 1 1 import json 2 import traceback 2 3 import unicodedata 3 4 from datetime import datetime … … 76 77 77 78 78 # Tehnomarket phone offers that are already in database 79 # Call to read the configuration file and connect to database 80 cinfo = config_read.get_databaseconfig("../postgresdb.config") 81 db_connection = psycopg2.connect( 82 database=cinfo[0], 83 host=cinfo[1], 84 user=cinfo[2], 85 password=cinfo[3] 86 ) 87 cur = db_connection.cursor() 79 88 80 offers = json.loads( 81 unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text)) 89 try: 90 # Tehnomarket phone offers that are already in database 91 offers = json.loads( 92 unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text)) 82 93 83 database_offers = []94 database_offers = [] 84 95 85 for offer in offers:86 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],87 offer['ram_memory'],88 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],89 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],90 offer['image_url'],91 offer['offer_url'], offer['last_updated'], offer['is_validated'],92 offer['offer_description'],93 offer['offer_shop_code'])94 database_offers.append(phoneOffer)96 for offer in offers: 97 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 98 offer['ram_memory'], 99 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 100 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 101 offer['image_url'], 102 offer['offer_url'], offer['last_updated'], offer['is_validated'], 103 offer['offer_description'], 104 offer['offer_shop_code']) 105 database_offers.append(phoneOffer) 95 106 96 new_offers = []107 new_offers = [] 97 108 98 for i in range(1, 6):99 tehnomarket_url = 'https://tehnomarket.com.mk/category/4109/mobilni-telefoni#page/' + str(i)100 # print(anhoch_url)109 for i in range(1, 6): 110 tehnomarket_url = 'https://tehnomarket.com.mk/category/4109/mobilni-telefoni#page/' + str(i) 111 # print(anhoch_url) 101 112 102 # selenium is used because of the dynamic content of the page103 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')104 driver1.get(tehnomarket_url)113 # selenium is used because of the dynamic content of the page 114 driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver') 115 driver1.get(tehnomarket_url) 105 116 106 scrape_function(driver1, i, new_offers)117 scrape_function(driver1, i, new_offers) 107 118 108 # closing the driver so the safari instance can pair with another webdriver session109 driver1.close()119 # closing the driver so the safari instance can pair with another webdriver session 120 driver1.close() 110 121 111 for new_offer in new_offers: 112 flag = False 113 flag_price = False 114 offer_id = None 122 for new_offer in new_offers: 123 flag = False 124 flag_price = False 125 offer_id = None 126 127 for old_offer in database_offers: 128 129 if new_offer.offer_shop_code == old_offer.offer_shop_code: 130 flag = True 131 if new_offer.price != old_offer.price: 132 flag_price = True 133 offer_id = old_offer.offer_id 134 135 if flag: 136 # print('ALREADY IN DATABASE') 137 # print(new_offer) 138 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 139 if flag_price: 140 print('PRICE CHANGED!') # CHANGE PRICE 141 print('offer id: ' + str(offer_id)) 142 headers = {'Content-type': 'application/json'} 143 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 144 headers=headers) 145 else: 146 print('ADDED') # ADD OFFER 147 print(new_offer) 148 headers = {'Content-type': 'application/json'} 149 requests.post('http://localhost:8080/phoneoffer/addoffer', 150 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 151 152 print('------------------------------------') 115 153 116 154 for old_offer in database_offers: 155 flag = False 156 for new_offer in new_offers: 157 if old_offer.offer_shop_code == new_offer.offer_shop_code: 158 flag = True 117 159 118 if new_offer.offer_shop_code == old_offer.offer_shop_code: 119 flag = True 120 if new_offer.price != old_offer.price: 121 flag_price = True 122 offer_id = old_offer.offer_id 123 124 if flag: 125 # print('ALREADY IN DATABASE') 126 # print(new_offer) 127 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 128 if flag_price: 129 print('PRICE CHANGED!') # CHANGE PRICE 130 print('offer id: ' + str(offer_id)) 131 headers = {'Content-type': 'application/json'} 132 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 133 headers=headers) 134 else: 135 print('ADDED') # ADD OFFER 136 print(new_offer) 137 headers = {'Content-type': 'application/json'} 138 requests.post('http://localhost:8080/phoneoffer/addoffer', 139 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 140 141 print('------------------------------------') 142 143 for old_offer in database_offers: 144 flag = False 145 for new_offer in new_offers: 146 if old_offer.offer_shop_code == new_offer.offer_shop_code: 147 flag = True 148 149 if not flag: 150 print('OFFER DELETED') 151 print(old_offer) 152 # DELETE OFFER 153 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 160 if not flag: 161 print('OFFER DELETED') 162 print(old_offer) 163 # DELETE OFFER 164 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 165 except Exception: 166 traceback.print_exc() 167 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 168 ' VALUES (%s, %s, %s);' 169 insert_value = ('Tehnomarket', datetime.now().date(), 'failed') 170 cur.execute(insert_script, insert_value) 171 db_connection.commit() 172 cur.close() 173 db_connection.close() 174 else: 175 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \ 176 ' VALUES (%s, %s, %s);' 177 insert_value = ('Tehnomarket', datetime.now().date(), 'success') 178 cur.execute(insert_script, insert_value) 179 db_connection.commit() 180 cur.close() 181 db_connection.close()
Note:
See TracChangeset
for help on using the changeset viewer.