source: phonelux_scrappers/scrappers/mobitech_scrapper.py@ 47f4eaf

Last change on this file since 47f4eaf was 47f4eaf, checked in by Marko <Marko@…>, 2 years ago

Final features implemented

  • Property mode set to 100644
File size: 7.2 KB
RevLine 
[895cd87]1import json
[47f4eaf]2import traceback
[895cd87]3import unicodedata
[b68ae8d]4from datetime import datetime
5
6import psycopg2
7import config_read
8from bs4 import BeautifulSoup
9import requests
[895cd87]10import sys
[b68ae8d]11
[895cd87]12from classes.phoneoffer import PhoneOffer
13
14file_path = 'outputfile.txt'
15sys.stdout = open(file_path, "w")
[b68ae8d]16
17offer_shop = "Mobitech" # offer shop
[895cd87]18last_updated = datetime.now().date()
[b68ae8d]19is_validated = False
20
[47f4eaf]21# Call to read the configuration file and connect to database
22cinfo = config_read.get_databaseconfig("../postgresdb.config")
23db_connection = psycopg2.connect(
24 database=cinfo[0],
25 host=cinfo[1],
26 user=cinfo[2],
27 password=cinfo[3]
28)
29cur = db_connection.cursor()
30
31try:
32 mobitech_url = "https://mobitech.mk/shop/"
33
34 response1 = requests.get(mobitech_url)
35
36 soup1 = BeautifulSoup(response1.content, 'html.parser')
37
38 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})
39
40 # Mobitech phone offers that are already in database
41 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text))
42
43 database_offers = []
44
45 for offer in offers:
46 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
47 offer['ram_memory'],
48 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
49 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
50 offer['image_url'],
51 offer['offer_url'], offer['last_updated'], offer['is_validated'],
52 offer['offer_description'],
53 offer['offer_shop_code'])
54 database_offers.append(phoneOffer)
55
56 new_offers = []
57
58 for phone in phones:
59 offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href') # url
60 image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src') # image
61 brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip() # brand
62 offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip() # offer_name
63 if brand not in offer_name:
64 offer_name = brand+" "+offer_name
65 temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi')
66 price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price
67
68 response2 = requests.get(offer_url)
69 soup2 = BeautifulSoup(response2.content, 'html.parser')
70
71 specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'})
72
73 ram_memory = None
74 rom_memory = None
75 battery = None
76 back_camera = None
77 front_camera = None
78 operating_system = None
79 chipset = None
80 color = None
81 offer_shop_code = None
82 cpu = None
83 offer_description = None
84
85 for specification in specifications:
86 # rom memory
87 if specification.get_text().startswith("Меморија:"):
88 rom_memory = specification.get_text().split("Меморија:")[1].strip()
89 if rom_memory == "Нема" or rom_memory == "/":
90 rom_memory = None
91
92 # ram memory
93 if specification.get_text().startswith("РАМ Меморија:"):
94 ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\
95 .replace('Ram', '').strip()
96 if ram_memory == "Нема" or ram_memory == "/":
97 ram_memory = None
98
99 # camera
100 if specification.get_text().startswith("Камера:"):
101 back_camera = specification.get_text().split("Камера:")[1].strip()
102 if back_camera == "Нема":
103 back_camera = None
104
105 # operating system
106 if specification.get_text().startswith("Оперативен систем:"):
107 operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip()
108 if operating_system == "Нема":
109 operating_system = None
110
111 # battery
112 if specification.get_text().startswith("Батерија:"):
113 battery = specification.get_text().split("Батерија:")[1].strip()
114 if battery == "Нема":
115 battery = None
116
117 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
118 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
119 image_url,
120 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
[895cd87]121
122 for new_offer in new_offers:
[47f4eaf]123 flag = False
124 flag_price = False
125 offer_id = None
126
127 for old_offer in database_offers:
128
129 if new_offer.offer_name == old_offer.offer_name:
130 flag = True
131 if new_offer.price != old_offer.price:
132 flag_price = True
133 offer_id = old_offer.offer_id
134
135 if flag:
136 print('ALREADY IN DATABASE')
137 print(new_offer)
138 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
139 if flag_price:
140 print('PRICE CHANGED!') # CHANGE PRICE
141 print('offer id: ' + str(offer_id))
142 headers = {'Content-type': 'application/json'}
143 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
144 headers=headers)
145 else:
146 print('ADDED') # ADD OFFER
147 print(new_offer)
148 headers = {'Content-type': 'application/json'}
149 requests.post('http://localhost:8080/phoneoffer/addoffer',
150 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
151
152 print('------------------------------------')
153
154 for old_offer in database_offers:
155 flag = False
156 for new_offer in new_offers:
157 if old_offer.offer_name == new_offer.offer_name:
158 flag = True
159
160 if not flag:
161 print('OFFER DELETED')
162 print(old_offer)
163 # DELETE OFFER
164 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
165except Exception:
166 traceback.print_exc()
167 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
168 ' VALUES (%s, %s, %s);'
169 insert_value = (offer_shop, last_updated, 'failed')
170 cur.execute(insert_script, insert_value)
171 db_connection.commit()
172 cur.close()
173 db_connection.close()
174else:
175 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
176 ' VALUES (%s, %s, %s);'
177 insert_value = (offer_shop, last_updated, 'success')
178 cur.execute(insert_script, insert_value)
179 db_connection.commit()
180 cur.close()
181 db_connection.close()
[b68ae8d]182
Note: See TracBrowser for help on using the repository browser.