source: phonelux_scrappers/scrappers/a1_scrapper.py@ 895cd87

Last change on this file since 895cd87 was 895cd87, checked in by Marko <Marko@…>, 21 months ago

Refactored code

  • Property mode set to 100644
File size: 6.1 KB
Line 
1import unicodedata
2from datetime import datetime
3import json
4import psycopg2
5import config_read
6from bs4 import BeautifulSoup
7import requests
8import sys
9import unicodedata
10
11from classes.phoneoffer import PhoneOffer
12
13file_path = 'outputfile.txt'
14sys.stdout = open(file_path, "w")
15
16offer_shop = "A1" # offer shop
17last_updated = datetime.now().date()
18is_validated = False
19
20# A1 phone offers that are already in database
21
22offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
23
24database_offers = []
25
26for offer in offers:
27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
28 offer['ram_memory'],
29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
31 offer['image_url'],
32 offer['offer_url'], offer['last_updated'], offer['is_validated'],
33 offer['offer_description'],
34 offer['offer_shop_code'])
35 database_offers.append(phoneOffer)
36
37a1_url = 'https://www.a1.mk/webshop/mk/phones'
38
39response1 = requests.get(a1_url)
40soup1 = BeautifulSoup(response1.content, 'html.parser')
41
42phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
43 .find_all('div', {'class', 'dvc-idtfr by4'})
44
45new_offers = []
46
47for phone in phones:
48 brand = phone.get('data-brand').strip()
49 offer_name = brand + " " + phone.get('data-model').strip()
50
51 # if brand not in offer_name:
52 # offer_name = brand+" "+offer_name
53
54 offer_shop_code = phone.get('data-productid').strip()
55 offer_url = phone.find('a', {'class', 'device-link'}).get('href')
56 image_url = phone.get('data-image')
57
58 response2 = requests.get(offer_url)
59 soup2 = BeautifulSoup(response2.content, 'html.parser')
60
61 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
62 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
63
64 # offer price
65 price = None
66 for temp_price in temp_prices:
67 if 'Цена само за уред' in temp_price.get_text().strip():
68 price = int(temp_price.get_text().replace('Цена само за уред', '')
69 .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
70
71 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
72
73 temp_colors = []
74 for color_section in colors_section:
75 temp_colors.append(color_section.get('data-content'))
76
77 color = ','.join(temp_colors) # colors available for the offer
78
79 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
80
81 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
82
83 back_camera = None
84 operating_system = None
85 cpu = None
86 rom_memory = None
87 ram_memory = None
88 battery = None
89 front_camera = None
90 chipset = None
91 offer_description = None
92
93 for row in table_rows:
94 if 'Камера' in row.get_text().strip():
95 back_camera = row.get_text().replace('Камера', '').strip()
96
97 if 'Оперативен систем' in row.get_text().strip():
98 operating_system = row.get_text().replace('Оперативен систем', '').strip()
99
100 if 'CPU' in row.get_text().strip():
101 cpu = row.get_text().replace('CPU', '').strip()
102
103 if 'Вградена меморија' in row.get_text().strip():
104 rom_memory = row.get_text().replace('Вградена меморија', '').strip()
105
106 if 'RAM меморија' in row.get_text().strip():
107 ram_memory = row.get_text().replace('RAM меморија', '').strip()
108
109 if 'Батерија' in row.get_text().strip():
110 battery = row.get_text().replace('Батерија', '').strip()
111
112 if 'Предна камера' in row.get_text().strip():
113 front_camera = row.get_text().replace('Предна камера', '').strip()
114
115 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
116 color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url,
117 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
118
119for new_offer in new_offers:
120 flag = False
121 flag_price = False
122 offer_id = None
123
124 for old_offer in database_offers:
125
126 if new_offer.offer_shop_code == old_offer.offer_shop_code:
127 flag = True
128 if new_offer.price != old_offer.price:
129 flag_price = True
130 offer_id = old_offer.offer_id
131
132 if flag:
133 # print('ALREADY IN DATABASE')
134 # print(new_offer)
135 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
136 if flag_price:
137 print('PRICE CHANGED!') # CHANGE PRICE
138 print('offer id: ' + str(offer_id))
139 headers = {'Content-type': 'application/json'}
140 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
141 headers=headers)
142 else:
143 print('ADDED') # ADD OFFER
144 print(new_offer)
145 headers = {'Content-type': 'application/json'}
146 requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__,
147 default=str))
148
149print('------------------------------------')
150
151for old_offer in database_offers:
152 flag = False
153 for new_offer in new_offers:
154 if old_offer.offer_shop_code == new_offer.offer_shop_code:
155 flag = True
156
157 if not flag:
158 print('OFFER DELETED')
159 print(old_offer)
160 # DELETE OFFER
161 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note: See TracBrowser for help on using the repository browser.