source: phonelux_scrappers/scrappers/handy_scrapper.py@ 895cd87

Last change on this file since 895cd87 was 895cd87, checked in by Marko <Marko@…>, 21 months ago

Refactored code

  • Property mode set to 100644
File size: 5.0 KB
Line 
1import json
2import unicodedata
3from datetime import datetime
4import psycopg2
5import config_read
6from bs4 import BeautifulSoup
7from selenium import webdriver
8import requests
9
10import sys
11
12from classes.phoneoffer import PhoneOffer
13
14file_path = 'outputfile.txt'
15sys.stdout = open(file_path, "w")
16
17
18offer_shop = "Handy" # offer shop
19last_updated = datetime.now().date()
20is_validated = False
21
22# Handy phone offers that are already in database
23offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text))
24
25database_offers = []
26
27for offer in offers:
28 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
29 offer['ram_memory'],
30 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
31 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
32 offer['image_url'],
33 offer['offer_url'], offer['last_updated'], offer['is_validated'],
34 offer['offer_description'],
35 offer['offer_shop_code'])
36 database_offers.append(phoneOffer)
37
38new_offers = []
39
40handy_url = 'https://www.handy.mk/telefoni?page=6'
41
42response1 = requests.get(handy_url)
43soup1 = BeautifulSoup(response1.content, 'html.parser')
44
45phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'})
46
47for phone in phones:
48 offer_url = phone.find('a').get('href')
49 offer_name = phone.find('div', {'data-hook': 'not-image-container'})\
50 .find('h3', {'data-hook': 'product-item-name'}).get_text().strip()
51 brand = offer_name.split(' ')[0].capitalize()
52 price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\
53 .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.')))
54
55 response2 = requests.get(offer_url)
56 soup2 = BeautifulSoup(response2.text, 'html.parser')
57
58 back_camera = None
59 operating_system = None
60 chipset = None
61 battery = None
62 ram_memory = None
63 rom_memory = None
64 cpu = None
65 front_camera = None
66 offer_shop_code = None
67 color = None
68 image_url = None
69
70 color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'})
71 if color_section is not None:
72 temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\
73 .find_all('input', {'type': 'radio'})
74 colors_list = []
75 for temp_color in temp_colors:
76 colors_list.append(temp_color.get('aria-label'))
77 color = ','.join(colors_list)
78
79 rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li')
80
81 if len(rows) == 0:
82 rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr')
83
84 specifications = []
85
86 for row in rows:
87 specifications.append(unicodedata.normalize('NFKD', row.get_text().strip()))
88
89 offer_description = '\n'.join(specifications)
90
91 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
92 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
93 image_url,
94 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
95
96for new_offer in new_offers:
97 flag = False
98 flag_price = False
99 offer_id = None
100
101 for old_offer in database_offers:
102
103 if new_offer.offer_name == old_offer.offer_name:
104 flag = True
105 if new_offer.price != old_offer.price:
106 flag_price = True
107 offer_id = old_offer.offer_id
108
109 if flag:
110 # print('ALREADY IN DATABASE')
111 # print(new_offer)
112 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
113 if flag_price:
114 print('PRICE CHANGED!') # CHANGE PRICE
115 print('offer id: ' + str(offer_id))
116 headers = {'Content-type': 'application/json'}
117 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
118 headers=headers)
119 else:
120 print('ADDED') # ADD OFFER
121 print(new_offer)
122 headers = {'Content-type': 'application/json'}
123 requests.post('http://localhost:8080/phoneoffer/addoffer',
124 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
125
126print('------------------------------------')
127
128for old_offer in database_offers:
129 flag = False
130 for new_offer in new_offers:
131 if old_offer.offer_name == new_offer.offer_name:
132 flag = True
133
134 if not flag:
135 print('OFFER DELETED')
136 print(old_offer)
137 # DELETE OFFER
138 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
139
140
Note: See TracBrowser for help on using the repository browser.