Changeset 895cd87 for phonelux_scrappers/scrappers/a1_scrapper.py
- Timestamp:
- 10/01/22 22:55:27 (2 years ago)
- Branches:
- master
- Children:
- fd5b100
- Parents:
- 48f3030
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
phonelux_scrappers/scrappers/a1_scrapper.py
r48f3030 r895cd87 1 1 import unicodedata 2 2 from datetime import datetime 3 3 import json 4 4 import psycopg2 5 5 import config_read 6 6 from bs4 import BeautifulSoup 7 7 import requests 8 import sys 9 import unicodedata 8 10 9 import sys 11 from classes.phoneoffer import PhoneOffer 10 12 11 13 file_path = 'outputfile.txt' 12 14 sys.stdout = open(file_path, "w") 13 15 14 # Call to read the configuration file and connect to database15 cinfo = config_read.get_databaseconfig("../postgresdb.config")16 db_connection = psycopg2.connect(17 database=cinfo[0],18 host=cinfo[1],19 user=cinfo[2],20 password=cinfo[3]21 )22 cur = db_connection.cursor()23 24 16 offer_shop = "A1" # offer shop 25 17 last_updated = datetime.now().date() 26 18 is_validated = False 19 20 # A1 phone offers that are already in database 21 22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text)) 23 24 database_offers = [] 25 26 for offer in offers: 27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'], 28 offer['ram_memory'], 29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'], 30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'], 31 offer['image_url'], 32 offer['offer_url'], offer['last_updated'], offer['is_validated'], 33 offer['offer_description'], 34 offer['offer_shop_code']) 35 database_offers.append(phoneOffer) 27 36 28 37 a1_url = 'https://www.a1.mk/webshop/mk/phones' … … 34 43 .find_all('div', {'class', 'dvc-idtfr by4'}) 35 44 45 new_offers = [] 46 36 47 for phone in phones: 37 48 brand = phone.get('data-brand').strip() 38 offer_name = brand +" "+phone.get('data-model').strip()49 offer_name = brand + " " + phone.get('data-model').strip() 39 50 40 51 # if brand not in offer_name: … … 77 88 battery = None 78 89 front_camera = None 90 chipset = None 91 offer_description = None 79 92 80 93 for row in table_rows: … … 100 113 front_camera = row.get_text().replace('Предна камера', '').strip() 101 114 102 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \ 103 'ram_memory, rom_memory, battery, back_camera, front_camera, color, cpu, ' \ 104 'operating_system, offer_shop_code, last_updated, is_validated)' \ 105 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' 106 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, rom_memory, 107 battery, back_camera, front_camera, color, cpu, operating_system, offer_shop_code, 108 last_updated, is_validated) 109 cur.execute(insert_script, insert_value) 110 db_connection.commit() 115 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory, 116 color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url, 117 offer_url, last_updated, is_validated, offer_description, offer_shop_code)) 111 118 112 cur.close() 113 db_connection.close() 119 for new_offer in new_offers: 120 flag = False 121 flag_price = False 122 offer_id = None 123 124 for old_offer in database_offers: 125 126 if new_offer.offer_shop_code == old_offer.offer_shop_code: 127 flag = True 128 if new_offer.price != old_offer.price: 129 flag_price = True 130 offer_id = old_offer.offer_id 131 132 if flag: 133 # print('ALREADY IN DATABASE') 134 # print(new_offer) 135 # if it's already in database, check PRICE and if it's changed, change it !!!!!! 136 if flag_price: 137 print('PRICE CHANGED!') # CHANGE PRICE 138 print('offer id: ' + str(offer_id)) 139 headers = {'Content-type': 'application/json'} 140 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price), 141 headers=headers) 142 else: 143 print('ADDED') # ADD OFFER 144 print(new_offer) 145 headers = {'Content-type': 'application/json'} 146 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__, 147 default=str)) 148 149 print('------------------------------------') 150 151 for old_offer in database_offers: 152 flag = False 153 for new_offer in new_offers: 154 if old_offer.offer_shop_code == new_offer.offer_shop_code: 155 flag = True 156 157 if not flag: 158 print('OFFER DELETED') 159 print(old_offer) 160 # DELETE OFFER 161 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note:
See TracChangeset
for help on using the changeset viewer.