source: phonelux_scrappers/scrappers/a1_scrapper.py@ 47f4eaf

Last change on this file since 47f4eaf was 47f4eaf, checked in by Marko <Marko@…>, 2 years ago

Final features implemented

  • Property mode set to 100644
File size: 7.4 KB
Line 
1import traceback
2import unicodedata
3from datetime import datetime
4import json
5import psycopg2
6import config_read
7from bs4 import BeautifulSoup
8import requests
9import sys
10import unicodedata
11
12from classes.phoneoffer import PhoneOffer
13
14file_path = 'outputfile.txt'
15sys.stdout = open(file_path, "w")
16
17offer_shop = "A1" # offer shop
18last_updated = datetime.now().date()
19is_validated = False
20
21# Call to read the configuration file and connect to database
22cinfo = config_read.get_databaseconfig("../postgresdb.config")
23db_connection = psycopg2.connect(
24 database=cinfo[0],
25 host=cinfo[1],
26 user=cinfo[2],
27 password=cinfo[3]
28)
29cur = db_connection.cursor()
30
31try:
32 # A1 phone offers that are already in database
33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
34
35 database_offers = []
36
37 for offer in offers:
38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
39 offer['ram_memory'],
40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
42 offer['image_url'],
43 offer['offer_url'], offer['last_updated'], offer['is_validated'],
44 offer['offer_description'],
45 offer['offer_shop_code'])
46 database_offers.append(phoneOffer)
47
48 a1_url = 'https://www.a1.mk/webshop/mk/phones'
49
50 response1 = requests.get(a1_url)
51 soup1 = BeautifulSoup(response1.content, 'html.parser')
52
53 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
54 .find_all('div', {'class', 'dvc-idtfr by4'})
55
56 new_offers = []
57
58 for phone in phones:
59 brand = phone.get('data-brand').strip()
60 offer_name = brand + " " + phone.get('data-model').strip()
61
62 # if brand not in offer_name:
63 # offer_name = brand+" "+offer_name
64
65 offer_shop_code = phone.get('data-productid').strip()
66 offer_url = phone.find('a', {'class', 'device-link'}).get('href')
67 image_url = phone.get('data-image')
68
69 response2 = requests.get(offer_url)
70 soup2 = BeautifulSoup(response2.content, 'html.parser')
71
72 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
73 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
74
75 # offer price
76 price = None
77 for temp_price in temp_prices:
78 if 'Цена само за уред' in temp_price.get_text().strip():
79 price = int(temp_price.get_text().replace('Цена само за уред', '')
80 .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
81
82 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
83
84 temp_colors = []
85 for color_section in colors_section:
86 temp_colors.append(color_section.get('data-content'))
87
88 color = ','.join(temp_colors) # colors available for the offer
89
90 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
91
92 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
93
94 back_camera = None
95 operating_system = None
96 cpu = None
97 rom_memory = None
98 ram_memory = None
99 battery = None
100 front_camera = None
101 chipset = None
102 offer_description = None
103
104 for row in table_rows:
105 if 'Камера' in row.get_text().strip():
106 back_camera = row.get_text().replace('Камера', '').strip()
107
108 if 'Оперативен систем' in row.get_text().strip():
109 operating_system = row.get_text().replace('Оперативен систем', '').strip()
110
111 if 'CPU' in row.get_text().strip():
112 cpu = row.get_text().replace('CPU', '').strip()
113
114 if 'Вградена меморија' in row.get_text().strip():
115 rom_memory = row.get_text().replace('Вградена меморија', '').strip()
116
117 if 'RAM меморија' in row.get_text().strip():
118 ram_memory = row.get_text().replace('RAM меморија', '').strip()
119
120 if 'Батерија' in row.get_text().strip():
121 battery = row.get_text().replace('Батерија', '').strip()
122
123 if 'Предна камера' in row.get_text().strip():
124 front_camera = row.get_text().replace('Предна камера', '').strip()
125
126 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
127 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
128 image_url,
129 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
130
131 for new_offer in new_offers:
132 flag = False
133 flag_price = False
134 offer_id = None
135
136 for old_offer in database_offers:
137
138 if new_offer.offer_shop_code == old_offer.offer_shop_code:
139 flag = True
140 if new_offer.price != old_offer.price:
141 flag_price = True
142 offer_id = old_offer.offer_id
143
144 if flag:
145 # print('ALREADY IN DATABASE')
146 # print(new_offer)
147 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
148 if flag_price:
149 print('PRICE CHANGED!') # CHANGE PRICE
150 print('offer id: ' + str(offer_id))
151 headers = {'Content-type': 'application/json'}
152 requests.put(
153 'http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
154 headers=headers)
155 else:
156 print('ADDED') # ADD OFFER
157 print(new_offer)
158 headers = {'Content-type': 'application/json'}
159 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers,
160 data=json.dumps(new_offer.__dict__,
161 default=str))
162
163 print('------------------------------------')
164
165 for old_offer in database_offers:
166 flag = False
167 for new_offer in new_offers:
168 if old_offer.offer_shop_code == new_offer.offer_shop_code:
169 flag = True
170
171 if not flag:
172 print('OFFER DELETED')
173 print(old_offer)
174 # DELETE OFFER
175 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
176except Exception:
177 traceback.print_exc()
178 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
179 ' VALUES (%s, %s, %s);'
180 insert_value = (offer_shop, last_updated, 'failed')
181 cur.execute(insert_script, insert_value)
182 db_connection.commit()
183 cur.close()
184 db_connection.close()
185else:
186 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
187 ' VALUES (%s, %s, %s);'
188 insert_value = (offer_shop, last_updated, 'success')
189 cur.execute(insert_script, insert_value)
190 db_connection.commit()
191 cur.close()
192 db_connection.close()
Note: See TracBrowser for help on using the repository browser.