source: phonelux_scrappers/scrappers/akcija_scrapper.py

Last change on this file was 47f4eaf, checked in by Marko <Marko@…>, 20 months ago

Final features implemented

  • Property mode set to 100644
File size: 6.3 KB
Line 
1import json
2import traceback
3from datetime import datetime
4
5import psycopg2
6import config_read
7from bs4 import BeautifulSoup
8import requests
9import unicodedata
10import sys
11from classes.phoneoffer import PhoneOffer
12
13file_path = 'outputfile.txt'
14sys.stdout = open(file_path, "w")
15
16
17offer_shop = "Akcija" # offer shop
18last_updated = datetime.now().date()
19is_validated = False
20
21# Call to read the configuration file and connect to database
22cinfo = config_read.get_databaseconfig("../postgresdb.config")
23db_connection = psycopg2.connect(
24 database=cinfo[0],
25 host=cinfo[1],
26 user=cinfo[2],
27 password=cinfo[3]
28)
29cur = db_connection.cursor()
30
31try:
32 # Akcija phone offers that are already in database
33 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text))
34
35 database_offers = []
36
37 for offer in offers:
38 phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
39 offer['ram_memory'],
40 offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
41 offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
42 offer['image_url'],
43 offer['offer_url'], offer['last_updated'], offer['is_validated'],
44 offer['offer_description'],
45 offer['offer_shop_code'])
46 database_offers.append(phoneOffer)
47
48 new_offers = []
49
50 i = 0
51 while i <= 20:
52 akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"
53 response1 = requests.get(akcija_url)
54 response1.encoding = 'utf-8'
55 soup1 = BeautifulSoup(response1.text, 'html.parser')
56
57 phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})
58
59 for phone in phones:
60 offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \
61 .get_text().replace('Паметен телефон', '').strip()
62 brand = offer_name.split(' ')[0]
63
64 if brand not in offer_name:
65 offer_name = brand + " " + offer_name
66
67 offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')
68 image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')
69 price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \
70 .find('ins').get_text().split(' ')[0].strip())
71
72 response2 = requests.get(offer_url)
73 response2.encoding = 'utf-8'
74 soup2 = BeautifulSoup(response2.text, 'html.parser')
75
76 back_camera = None
77 operating_system = None
78 chipset = None
79 battery = None
80 ram_memory = None
81 rom_memory = None
82 cpu = None
83 front_camera = None
84 color = None
85 offer_shop_code = None
86
87 specifications = soup2.find('main', {'id': 'content'}) \
88 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
89 .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')
90
91 offer_description = ''
92 for specification in specifications:
93 if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):
94 continue
95 offer_description += unicodedata.normalize('NFKD',
96 str(specification.get_text(separator='\n').strip())) + "\n"
97
98 new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
99 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
100 image_url,
101 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
102 i += 20
103
104 for new_offer in new_offers:
105 flag = False
106 flag_price = False
107 offer_id = None
108
109 for old_offer in database_offers:
110
111 if new_offer.offer_name == old_offer.offer_name:
112 flag = True
113 if new_offer.price != old_offer.price:
114 flag_price = True
115 offer_id = old_offer.offer_id
116
117 if flag:
118 # print('ALREADY IN DATABASE')
119 # print(new_offer)
120 # if it's already in database, check PRICE and if it's changed, change it !!!!!!
121 if flag_price:
122 print('PRICE CHANGED!') # CHANGE PRICE
123 print('offer id: ' + str(offer_id))
124 headers = {'Content-type': 'application/json'}
125 requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
126 headers=headers)
127 else:
128 print('ADDED') # ADD OFFER
129 print(new_offer)
130 headers = {'Content-type': 'application/json'}
131 requests.post('http://localhost:8080/phoneoffer/addoffer',
132 headers=headers, data=json.dumps(new_offer.__dict__, default=str))
133
134 print('------------------------------------')
135
136 for old_offer in database_offers:
137 flag = False
138 for new_offer in new_offers:
139 if old_offer.offer_name == new_offer.offer_name:
140 flag = True
141
142 if not flag:
143 print('OFFER DELETED')
144 print(old_offer)
145 # DELETE OFFER
146 requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
147except Exception:
148 traceback.print_exc()
149 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
150 ' VALUES (%s, %s, %s);'
151 insert_value = (offer_shop, last_updated, 'failed')
152 cur.execute(insert_script, insert_value)
153 db_connection.commit()
154 cur.close()
155 db_connection.close()
156else:
157 insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
158 ' VALUES (%s, %s, %s);'
159 insert_value = (offer_shop, last_updated, 'success')
160 cur.execute(insert_script, insert_value)
161 db_connection.commit()
162 cur.close()
163 db_connection.close()
164
Note: See TracBrowser for help on using the repository browser.