Changeset 895cd87 for phonelux_scrappers/scrappers/tehnomarket_scrapper.py
- Timestamp:
- 10/01/22 22:55:27 (21 months ago)
- Branches:
- master
- Children:
- fd5b100
- Parents:
- 48f3030
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/tehnomarket_scrapper.py
r48f3030 r895cd87 1 import json 1 2 import unicodedata 2 3 from datetime import datetime … … 6 7 from selenium import webdriver 7 8 import requests 9 import sys 8 10 9 import sys 11 from classes.phoneoffer import PhoneOffer 10 12 11 13 file_path = 'outputfile.txt' 12 14 sys.stdout = open(file_path, "w") 13 15 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 16 24 25 def scrape_function(driver1, i): 17 def scrape_function(driver1, i, new_offers): 26 18 offer_shop = "Tehnomarket" # offer shop 27 19 last_updated = datetime.now().date() … … 59 51 offer_shop_code = details[4].strip() 60 52 53 back_camera = None 54 operating_system = None 55 chipset = None 56 battery = None 57 ram_memory = None 58 rom_memory = None 59 cpu = None 60 front_camera = None 61 color = None 62 61 63 specifications = [] 62 64 for info in soup2.find_all('span', {'class': 'info'}): 63 65 specifications.append(info.get_text()) 64 66 65 print(brand)66 print(offer_name)67 print()68 print()69 70 67 offer_description = '\n'.join(specifications) 71 68 72 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \ 73 'offer_description, offer_shop_code, last_updated, is_validated)' \ 74 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 75 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, offer_description, 76 offer_shop_code, last_updated, is_validated) 77 cur.execute(insert_script, insert_value) 78 db_connection.commit() 69 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 70 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 71 image_url, 72 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 79 73 else: 80 74 driver1.implicitly_wait(30) 81 scrape_function(driver1, i )75 scrape_function(driver1, i, new_offers) 82 76 77 78 # Tehnomarket phone offers that are already in database 79 80 offers = json.loads( 81 unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text)) 82 83 database_offers = [] 84 85 for offer in offers: 86 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 87 offer['ram_memory'], 88 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 89 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 90 offer['image_url'], 91 offer['offer_url'], offer['last_updated'], offer['is_validated'], 92 offer['offer_description'], 93 offer['offer_shop_code']) 94 database_offers.append(phoneOffer) 95 96 new_offers = [] 83 97 84 98 for i in range(1, 6): … … 90 104 driver1.get(tehnomarket_url) 91 105 92 scrape_function(driver1, i) 106 scrape_function(driver1, i, new_offers) 107 93 108 # closing the driver so the safari instance can pair with another webdriver session 94 109 driver1.close() 95 110 96 cur.close() 97 db_connection.close() 111 for new_offer in new_offers: 112 flag = False 113 flag_price = False 114 offer_id = None 115 116 for old_offer in database_offers: 117 118 if new_offer.offer_shop_code == old_offer.offer_shop_code: 119 flag = True 120 if new_offer.price != old_offer.price: 121 flag_price = True 122 offer_id = old_offer.offer_id 123 124 if flag: 125 # print('ALREADY IN DATABASE') 126 # print(new_offer) 127 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 128 if flag_price: 129 print('PRICE CHANGED!') # CHANGE PRICE 130 print('offer id: ' + str(offer_id)) 131 headers = {'Content-type': 'application/json'} 132 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 133 headers=headers) 134 else: 135 print('ADDED') # ADD OFFER 136 print(new_offer) 137 headers = {'Content-type': 'application/json'} 138 requests.post('http://localhost:8080/phoneoffer/addoffer', 139 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 140 141 print('------------------------------------') 142 143 for old_offer in database_offers: 144 flag = False 145 for new_offer in new_offers: 146 if old_offer.offer_shop_code == new_offer.offer_shop_code: 147 flag = True 148 149 if not flag: 150 print('OFFER DELETED') 151 print(old_offer) 152 # DELETE OFFER 153 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note:
See TracChangeset
for help on using the changeset viewer.