source: phonelux_scrappers/scrappers/mobilezone_scrapper.py

Last change on this file was 47f4eaf, checked in by Marko <Marko@…>, 20 months ago

Final features implemented

  • Property mode set to 100644
File size: 7.3 KB
Line 
1import json
2import traceback
3import unicodedata
4from datetime import datetime
5import psycopg2
6import config_read
7from bs4 import BeautifulSoup
8from selenium import webdriver
9import requests
10import sys
11
12from classes.phoneoffer import PhoneOffer
13
14file_path = 'outputfile.txt'
15sys.stdout = open(file_path, "w")
16
17offer_shop = "Mobile Zone" # offer shop
18last_updated = datetime.now().date()
19is_validated = False
20
21# Call to read the configuration file and connect to database
22cinfo = config_read.get_databaseconfig("../postgresdb.config")
23db_connection = psycopg2.connect(
24 database=cinfo[0],
25 host=cinfo[1],
26 user=cinfo[2],
27 password=cinfo[3]
28)
29cur = db_connection.cursor()
30
31try:
32 # Mobile Zone phone offers that are already in database
33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text))
34
35 database_offers = []
36
37 for offer in offers:
38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
39 offer['ram_memory'],
40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
42 offer['image_url'],
43 offer['offer_url'], offer['last_updated'], offer['is_validated'],
44 offer['offer_description'],
45 offer['offer_shop_code'])
46 database_offers.append(phoneOffer)
47
48 new_offers = []
49
50 for i in range(1, 3):
51 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'
52
53 response1 = requests.get(mobilezone_url)
54 soup1 = BeautifulSoup(response1.content, 'html.parser')
55
56 phones = soup1.find('ul', {
57 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '
58 '--paginated columns-4'}).find_all('li')
59
60 for phone in phones:
61 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(
62 'href')
63 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \
64 .find('img').get('data-lazy-src')
65
66 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})
67
68 if brand_section is not None:
69 brand = brand_section.find('a').get_text().strip()
70 else:
71 brand = None
72
73 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip()
74
75 if brand is not None and brand not in offer_name:
76 offer_name = brand + ' ' + offer_name
77
78 price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'})
79 price = None
80
81 if price_tag is not None:
82 price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text()
83 .replace(',', '')
84 .replace('ден', '').strip()))
85 else:
86 continue
87
88 response2 = requests.get(offer_url)
89 soup2 = BeautifulSoup(response2.text, 'html.parser')
90
91 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr')
92
93 back_camera = None
94 front_camera = None
95 rom_memory = None
96 ram_memory = None
97 operating_system = None
98 cpu = None
99 chipset = None
100 offer_description = None
101 offer_shop_code = None
102 battery = None
103 color = None
104
105 for specification in specifications:
106 if 'Главна камера' in specification.find('th').get_text():
107 back_camera = specification.find('td').get_text().strip()
108
109 if 'Селфи камера' in specification.find('th').get_text():
110 front_camera = specification.find('td').get_text().strip()
111
112 if 'Батерија' in specification.find('th').get_text():
113 battery = specification.find('td').get_text().strip()
114
115 if 'Меморија' in specification.find('th').get_text():
116 rom_memory = specification.find('td').get_text().strip()
117
118 if 'Боја' in specification.find('th').get_text():
119 color = specification.find('td').get_text().strip()
120
121 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
122 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
123 image_url,
124 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
125
126 for new_offer in new_offers:
127 flag = False
128 flag_price = False
129 offer_id = None
130
131 for old_offer in database_offers:
132
133 if new_offer.offer_name == old_offer.offer_name:
134 flag = True
135 if new_offer.price != old_offer.price:
136 flag_price = True
137 offer_id = old_offer.offer_id
138
139 if flag:
140 # print('ALREADY IN DATABASE')
141 # print(new_offer)
142 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
143 if flag_price:
144 print('PRICE CHANGED!') # CHANGE PRICE
145 print('offer id: ' + str(offer_id))
146 headers = {'Content-type': 'application/json'}
147 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
148 headers=headers)
149 else:
150 print('ADDED') # ADD OFFER
151 print(new_offer)
152 headers = {'Content-type': 'application/json'}
153 requests.post('http://localhost:8080/phoneoffer/addoffer',
154 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
155
156 print('------------------------------------')
157
158 for old_offer in database_offers:
159 flag = False
160 for new_offer in new_offers:
161 if old_offer.offer_name == new_offer.offer_name:
162 flag = True
163
164 if not flag:
165 print('OFFER DELETED')
166 print(old_offer)
167 # DELETE OFFER
168 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
169except Exception:
170 traceback.print_exc()
171 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
172 ' VALUES (%s, %s, %s);'
173 insert_value = (offer_shop, last_updated, 'failed')
174 cur.execute(insert_script, insert_value)
175 db_connection.commit()
176 cur.close()
177 db_connection.close()
178else:
179 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
180 ' VALUES (%s, %s, %s);'
181 insert_value = (offer_shop, last_updated, 'success')
182 cur.execute(insert_script, insert_value)
183 db_connection.commit()
184 cur.close()
185 db_connection.close()
Note: See TracBrowser for help on using the repository browser.