source: phonelux_scrappers/scrappers/mobelix_scrapper.py@ 895cd87

Last change on this file since 895cd87 was 895cd87, checked in by Marko <Marko@…>, 21 months ago

Refactored code

  • Property mode set to 100644
File size: 7.2 KB
Line 
1import json
2import sys
3import unicodedata
4from datetime import datetime
5
6import psycopg2
7import config_read
8from bs4 import BeautifulSoup
9import requests
10
11# import sys
12from classes.phoneoffer import PhoneOffer
13
14file_path = 'outputfile.txt'
15sys.stdout = open(file_path, "w")
16
17offer_shop = "Mobelix" # offer shop
18last_updated = datetime.now().date()
19is_validated = False
20
21# Mobelix phone offers that are already in database
22
23offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text))
24
25database_offers = []
26
27for offer in offers:
28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
29 offer['ram_memory'],
30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
32 offer['image_url'],
33 offer['offer_url'], offer['last_updated'], offer['is_validated'],
34 offer['offer_description'],
35 offer['offer_shop_code'])
36 database_offers.append(phoneOffer)
37
38new_offers = []
39
40for i in range(1, 17):
41 mobelix_url = "https://mobelix.com.mk/mk/mobilni-telefoni?page=" + str(i)
42
43 response1 = requests.get(mobelix_url)
44 soup1 = BeautifulSoup(response1.content, 'html.parser')
45
46 phones = soup1.find_all('div', {'class': 'p-2 rounded text-dark bg-white d-flex w-100'})
47
48 for phone in phones:
49 offer_url = phone.find('a').get('href')
50 image_url = phone.find_all('div', {'class': 'col-12'})[0].find('img').get('src')
51 brand = phone.find_all('div', {'class': 'col-12'})[1].find('h5', {'class': 'mb-0'}).get_text().strip()
52 offer_name = phone.find_all('div', {'class': 'col-12'})[1] \
53 .find('h3', {'class': 'h5 font-weight-normal'}).get_text().strip()
54
55 if 'Watch' in offer_name or 'Pad' in offer_name or 'Tab' in offer_name or 'Pods' in offer_name or 'Buds' in offer_name or 'HomePod' in offer_name:
56 continue
57
58 if brand not in offer_name:
59 offer_name = brand + " " + offer_name
60
61 temp_prices = phone.find_all('div', {'class': 'col-12'})[1] \
62 .find('p', {'class': 'h5 price'}).get_text(separator='/').strip()
63
64 if len(temp_prices.split('/')) > 1:
65 price = int(float(temp_prices.split('/')[1].replace(',', '').replace('ден', '').strip()))
66 else:
67 price = int(float(temp_prices.split('/')[0].replace(',', '').replace('ден', '').strip()))
68
69 response2 = requests.get(offer_url)
70 soup2 = BeautifulSoup(response2.content, 'html.parser')
71
72 colors_divs = soup2.find('div', {'class': 'color-wrapper mt-2 mb-1'}) \
73 .find_all('div', {'class': 'color-box d-inline-block'}) # color div tags
74
75 temp_colors = []
76 for div in colors_divs:
77 temp_colors.append(div.get('title'))
78
79 color = ",".join(temp_colors) # available colors for offer
80
81 tables = soup2.find('div', {'class': 'mobelix-specs table-white bordered-table'}).find_all('table')
82
83 operating_system = None
84 chipset = None
85 battery = None
86 ram_memory = None
87 rom_memory = None
88 front_camera = ''
89 back_camera = ''
90 cpu = None
91 offer_shop_code = None
92 offer_description = None
93
94 for table in tables:
95 for cell in table.find_all('td'):
96 if cell.get('data-spec') is None:
97 continue
98
99 if cell.get('data-spec') == 'os':
100 operating_system = unicodedata.normalize('NFKD', cell.get_text().strip())
101
102 if cell.get('data-spec') == 'chipset':
103 chipset = unicodedata.normalize('NFKD', cell.get_text().strip())
104
105 if cell.get('data-spec') == 'cpu':
106 cpu = unicodedata.normalize('NFKD', cell.get_text().strip())
107
108 if cell.get('data-spec') == 'internalmemory':
109 temp_rom = []
110 temp_ram = []
111 temp_internalmemory = unicodedata.normalize('NFKD', cell.get_text().strip())
112 for internalmemory in temp_internalmemory.split(','):
113 temp_rom.append(internalmemory.strip().split(' ')[0])
114 if len(internalmemory.strip().split(' ')) > 1:
115 temp_ram.append(internalmemory.strip().split(' ')[1])
116 rom_memory = ','.join(temp_rom)
117 ram_memory = ','.join(temp_ram)
118
119 if cell.get('data-spec') == 'cam1modules' or cell.get('data-spec') == 'cam1features' or cell.get(
120 'data-spec') == 'cam1video':
121 back_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n'
122
123 if cell.get('data-spec') == 'cam2modules' or cell.get('data-spec') == 'cam2features' or cell.get(
124 'data-spec') == 'cam2video':
125 front_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n'
126
127 if cell.get('data-spec') == 'batdescription1':
128 battery = unicodedata.normalize('NFKD', cell.get_text().strip())
129
130 if front_camera == 'No':
131 front_camera = None
132
133 if back_camera == 'No':
134 back_camera = None
135
136 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
137 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
138 image_url,
139 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
140
141
142for new_offer in new_offers:
143 flag = False
144 flag_price = False
145 offer_id = None
146
147 for old_offer in database_offers:
148
149 if new_offer.offer_name == old_offer.offer_name:
150 flag = True
151 if new_offer.price != old_offer.price:
152 flag_price = True
153 offer_id = old_offer.offer_id
154
155 if flag:
156 # print('ALREADY IN DATABASE')
157 # print(new_offer)
158 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
159 if flag_price:
160 print('PRICE CHANGED!') # CHANGE PRICE
161 print('offer id: ' + str(offer_id))
162 headers = {'Content-type': 'application/json'}
163 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
164 headers=headers)
165 else:
166 print('ADDED') # ADD OFFER
167 print(new_offer)
168 headers = {'Content-type': 'application/json'}
169 requests.post('http://localhost:8080/phoneoffer/addoffer',
170 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
171
172print('------------------------------------')
173
174for old_offer in database_offers:
175 flag = False
176 for new_offer in new_offers:
177 if old_offer.offer_name == new_offer.offer_name:
178 flag = True
179
180 if not flag:
181 print('OFFER DELETED')
182 print(old_offer)
183 # DELETE OFFER
184 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note: See TracBrowser for help on using the repository browser.