source: phonelux_scrappers/scrappers/mobilezone_scrapper.py@ ffd50db

Last change on this file since ffd50db was 895cd87, checked in by Marko <Marko@…>, 21 months ago

Refactored code

  • Property mode set to 100644
File size: 6.0 KB
Line 
1import json
2import unicodedata
3from datetime import datetime
4import psycopg2
5import config_read
6from bs4 import BeautifulSoup
7from selenium import webdriver
8import requests
9import sys
10
11from classes.phoneoffer import PhoneOffer
12
13file_path = 'outputfile.txt'
14sys.stdout = open(file_path, "w")
15
16offer_shop = "Mobile Zone" # offer shop
17last_updated = datetime.now().date()
18is_validated = False
19
20# Mobile Zone phone offers that are already in database
21
22offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text))
23
24database_offers = []
25
26for offer in offers:
27 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
28 offer['ram_memory'],
29 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
30 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
31 offer['image_url'],
32 offer['offer_url'], offer['last_updated'], offer['is_validated'],
33 offer['offer_description'],
34 offer['offer_shop_code'])
35 database_offers.append(phoneOffer)
36
37new_offers = []
38
39for i in range(1, 3):
40 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'
41
42 response1 = requests.get(mobilezone_url)
43 soup1 = BeautifulSoup(response1.content, 'html.parser')
44
45 phones = soup1.find('ul', {
46 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '
47 '--paginated columns-4'}).find_all('li')
48
49 for phone in phones:
50 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(
51 'href')
52 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \
53 .find('img').get('data-lazy-src')
54
55 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})
56
57 if brand_section is not None:
58 brand = brand_section.find('a').get_text().strip()
59 else:
60 brand = None
61
62 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip()
63
64 if brand is not None and brand not in offer_name:
65 offer_name = brand + ' ' + offer_name
66
67 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'})
68 price = None
69
70 if price_tag is not None:
71 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text()
72 .replace(',', '')
73 .replace('ден', '').strip()))
74 else:
75 continue
76
77 response2 = requests.get(offer_url)
78 soup2 = BeautifulSoup(response2.text, 'html.parser')
79
80 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr')
81
82 back_camera = None
83 front_camera = None
84 rom_memory = None
85 ram_memory = None
86 operating_system = None
87 cpu = None
88 chipset = None
89 offer_description = None
90 offer_shop_code = None
91 battery = None
92 color = None
93
94 for specification in specifications:
95 if 'Главна камера' in specification.find('th').get_text():
96 back_camera = specification.find('td').get_text().strip()
97
98 if 'Селфи камера' in specification.find('th').get_text():
99 front_camera = specification.find('td').get_text().strip()
100
101 if 'Батерија' in specification.find('th').get_text():
102 battery = specification.find('td').get_text().strip()
103
104 if 'Меморија' in specification.find('th').get_text():
105 rom_memory = specification.find('td').get_text().strip()
106
107 if 'Боја' in specification.find('th').get_text():
108 color = specification.find('td').get_text().strip()
109
110 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
111 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
112 image_url,
113 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
114
115for new_offer in new_offers:
116 flag = False
117 flag_price = False
118 offer_id = None
119
120 for old_offer in database_offers:
121
122 if new_offer.offer_name == old_offer.offer_name:
123 flag = True
124 if new_offer.price != old_offer.price:
125 flag_price = True
126 offer_id = old_offer.offer_id
127
128 if flag:
129 # print('ALREADY IN DATABASE')
130 # print(new_offer)
131 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
132 if flag_price:
133 print('PRICE CHANGED!') # CHANGE PRICE
134 print('offer id: ' + str(offer_id))
135 headers = {'Content-type': 'application/json'}
136 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
137 headers=headers)
138 else:
139 print('ADDED') # ADD OFFER
140 print(new_offer)
141 headers = {'Content-type': 'application/json'}
142 requests.post('http://localhost:8080/phoneoffer/addoffer',
143 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
144
145print('------------------------------------')
146
147for old_offer in database_offers:
148 flag = False
149 for new_offer in new_offers:
150 if old_offer.offer_name == new_offer.offer_name:
151 flag = True
152
153 if not flag:
154 print('OFFER DELETED')
155 print(old_offer)
156 # DELETE OFFER
157 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note: See TracBrowser for help on using the repository browser.