Changeset 895cd87 for phonelux_scrappers/scrappers/mobitech_scrapper.py
- Timestamp:
- 10/01/22 22:55:27 (2 years ago)
- Branches:
- master
- Children:
- fd5b100
- Parents:
- 48f3030
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/mobitech_scrapper.py
r48f3030 r895cd87 1 import json 2 import unicodedata 1 3 from datetime import datetime 2 4 … … 5 7 from bs4 import BeautifulSoup 6 8 import requests 9 import sys 7 10 8 # import sys 9 # file_path = 'outputfile.txt' 10 # sys.stdout = open(file_path, "w") 11 from classes.phoneoffer import PhoneOffer 11 12 12 # Call to read the configuration file and connect to database 13 cinfo = config_read.get_databaseconfig("../postgresdb.config") 14 db_connection = psycopg2.connect( 15 database=cinfo[0], 16 host=cinfo[1], 17 user=cinfo[2], 18 password=cinfo[3] 19 ) 20 cur = db_connection.cursor() 13 file_path = 'outputfile.txt' 14 sys.stdout = open(file_path, "w") 15 21 16 22 17 mobitech_url = "https://mobitech.mk/shop/" … … 29 24 30 25 offer_shop = "Mobitech" # offer shop 26 last_updated = datetime.now().date() 31 27 is_validated = False 28 29 # Mobitech phone offers that are already in database 30 31 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text)) 32 33 database_offers = [] 34 35 for offer in offers: 36 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 37 offer['ram_memory'], 38 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 39 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 40 offer['image_url'], 41 offer['offer_url'], offer['last_updated'], offer['is_validated'], 42 offer['offer_description'], 43 offer['offer_shop_code']) 44 database_offers.append(phoneOffer) 45 46 new_offers = [] 32 47 33 48 for phone in phones: … … 40 55 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi') 41 56 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price 42 last_updated = datetime.now().date() # offer last_updated date43 57 44 58 response2 = requests.get(offer_url) … … 47 61 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'}) 48 62 49 ram_memory = "" 50 rom_memory = "" 51 battery = "" 52 back_camera = "" 53 operating_system = "" 63 ram_memory = None 64 rom_memory = None 65 battery = None 66 back_camera = None 67 front_camera = None 68 operating_system = None 69 chipset = None 70 color = None 71 offer_shop_code = None 72 cpu = None 73 offer_description = None 54 74 55 75 for specification in specifications: … … 62 82 # ram memory 63 83 if specification.get_text().startswith("РАМ Меморија:"): 64 ram_memory = specification.get_text().split("РАМ Меморија:")[1].strip() 84 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\ 85 .replace('Ram', '').strip() 65 86 if ram_memory == "Нема" or ram_memory == "/": 66 87 ram_memory = None … … 84 105 battery = None 85 106 86 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,' \ 87 ' rom_memory, battery, back_camera, last_updated, operating_system, is_validated)' \ 88 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 89 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, 90 rom_memory, battery, back_camera, last_updated, operating_system, is_validated) 91 cur.execute(insert_script, insert_value) 92 db_connection.commit() 107 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 108 color, front_camera, back_camera, chipset, battery, operating_system, cpu, 109 image_url, 110 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 93 111 94 cur.close() 95 db_connection.close() 112 for new_offer in new_offers: 113 flag = False 114 flag_price = False 115 offer_id = None 116 117 for old_offer in database_offers: 118 119 if new_offer.offer_name == old_offer.offer_name: 120 flag = True 121 if new_offer.price != old_offer.price: 122 flag_price = True 123 offer_id = old_offer.offer_id 124 125 if flag: 126 print('ALREADY IN DATABASE') 127 print(new_offer) 128 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 129 if flag_price: 130 print('PRICE CHANGED!') # CHANGE PRICE 131 print('offer id: ' + str(offer_id)) 132 headers = {'Content-type': 'application/json'} 133 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 134 headers=headers) 135 else: 136 print('ADDED') # ADD OFFER 137 print(new_offer) 138 headers = {'Content-type': 'application/json'} 139 requests.post('http://localhost:8080/phoneoffer/addoffer', 140 headers=headers, data=json.dumps(new_offer.__dict__, default=str)) 141 142 print('------------------------------------') 143 144 for old_offer in database_offers: 145 flag = False 146 for new_offer in new_offers: 147 if old_offer.offer_name == new_offer.offer_name: 148 flag = True 149 150 if not flag: 151 print('OFFER DELETED') 152 print(old_offer) 153 # DELETE OFFER 154 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id)) 155
Note:
See TracChangeset
for help on using the changeset viewer.