source: phonelux_scrappers/scrappers/setec_scrapper.py

Last change on this file was 47f4eaf, checked in by Marko <Marko@…>, 20 months ago

Final features implemented

  • Property mode set to 100644
File size: 6.3 KB
Line 
1import json
2import traceback
3import unicodedata
4from datetime import datetime
5import psycopg2
6import config_read
7from bs4 import BeautifulSoup
8import requests
9import sys
10
11from classes.phoneoffer import PhoneOffer
12
13file_path = 'outputfile.txt'
14sys.stdout = open(file_path, "w")
15
16offer_shop = "Setec" # offer shop
17last_updated = datetime.now().date()
18is_validated = False
19
20# Call to read the configuration file and connect to database
21cinfo = config_read.get_databaseconfig("../postgresdb.config")
22db_connection = psycopg2.connect(
23 database=cinfo[0],
24 host=cinfo[1],
25 user=cinfo[2],
26 password=cinfo[3]
27)
28cur = db_connection.cursor()
29
30try:
31 # Setec phone offers that are already in database
32 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text))
33
34 database_offers = []
35
36 for offer in offers:
37 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
38 offer['ram_memory'],
39 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
40 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
41 offer['image_url'],
42 offer['offer_url'], offer['last_updated'], offer['is_validated'],
43 offer['offer_description'],
44 offer['offer_shop_code'])
45 database_offers.append(phoneOffer)
46
47 new_offers = []
48
49 for i in range(1, 9):
50 setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)
51
52 response1 = requests.get(setec_url)
53 soup1 = BeautifulSoup(response1.content, 'html.parser')
54
55 phones = soup1.find('div', {'id': 'mfilter-content-container'}) \
56 .find_all('div', {'class': 'col-sm-4 col-xs-6'})
57
58 for phone in phones:
59 offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')
60 image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')
61 offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()
62 brand = offer_name.split(' ')[0]
63
64 back_camera = None
65 operating_system = None
66 chipset = None
67 battery = None
68 ram_memory = None
69 rom_memory = None
70 cpu = None
71 front_camera = None
72 color = None
73
74 if 'Cable' in offer_name or 'AirTag' in offer_name:
75 continue
76
77 if brand not in offer_name:
78 offer_name = brand + " " + offer_name
79
80 offer_shop_code = phone.find('div', {'class': 'right'}) \
81 .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()
82
83 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
84 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})
85
86 if price_tag is None:
87 price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
88 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})
89
90 price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())
91
92 response2 = requests.get(offer_url)
93 soup2 = BeautifulSoup(response2.content, 'html.parser')
94
95 offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')
96
97 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
98 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
99 image_url,
100 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
101
102 for new_offer in new_offers:
103 flag = False
104 flag_price = False
105 offer_id = None
106
107 for old_offer in database_offers:
108
109 if new_offer.offer_shop_code == old_offer.offer_shop_code:
110 flag = True
111 if new_offer.price != old_offer.price:
112 flag_price = True
113 offer_id = old_offer.offer_id
114
115 if flag:
116 # print('ALREADY IN DATABASE')
117 # print(new_offer)
118 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
119 if flag_price:
120 print('PRICE CHANGED!') # CHANGE PRICE
121 print('offer id: ' + str(offer_id))
122 headers = {'Content-type': 'application/json'}
123 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
124 headers=headers)
125 else:
126 print('ADDED') # ADD OFFER
127 print(new_offer)
128 headers = {'Content-type': 'application/json'}
129 requests.post('http://localhost:8080/phoneoffer/addoffer',
130 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
131
132 print('------------------------------------')
133
134 for old_offer in database_offers:
135 flag = False
136 for new_offer in new_offers:
137 if old_offer.offer_shop_code == new_offer.offer_shop_code:
138 flag = True
139
140 if not flag:
141 print('OFFER DELETED')
142 print(old_offer)
143 # DELETE OFFER
144 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
145except Exception:
146 traceback.print_exc()
147 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
148 ' VALUES (%s, %s, %s);'
149 insert_value = (offer_shop, last_updated, 'failed')
150 cur.execute(insert_script, insert_value)
151 db_connection.commit()
152 cur.close()
153 db_connection.close()
154else:
155 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
156 ' VALUES (%s, %s, %s);'
157 insert_value = (offer_shop, last_updated, 'success')
158 cur.execute(insert_script, insert_value)
159 db_connection.commit()
160 cur.close()
161 db_connection.close()
Note: See TracBrowser for help on using the repository browser.