source: phonelux_scrappers/scrappers/akcija_scrapper.py@ b68ae8d

Last change on this file since b68ae8d was b68ae8d, checked in by Marko <Marko@…>, 22 months ago

Created spring app, edited setec_scrapper

  • Property mode set to 100644
File size: 2.8 KB
Line 
1from datetime import datetime
2
3import psycopg2
4import config_read
5from bs4 import BeautifulSoup
6import requests
7import unicodedata
8import sys
9
10# file_path = '../outputfile.txt'
11# sys.stdout = open(file_path, "w")
12
13# Call to read the configuration file and connect to database
14cinfo = config_read.get_databaseconfig("../postgresdb.config")
15db_connection = psycopg2.connect(
16 database=cinfo[0],
17 host=cinfo[1],
18 user=cinfo[2],
19 password=cinfo[3]
20)
21cur = db_connection.cursor()
22
23offer_shop = "Akcija" # offer shop
24last_updated = datetime.now().date()
25is_validated = False
26
27i = 0
28while i <= 20:
29 akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"
30 response1 = requests.get(akcija_url)
31 response1.encoding = 'utf-8'
32 soup1 = BeautifulSoup(response1.text, 'html.parser')
33
34 phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})
35
36 for phone in phones:
37 offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \
38 .get_text().replace('Паметен телефон', '').strip()
39 brand = offer_name.split(' ')[0]
40
41 if brand not in offer_name:
42 offer_name = brand + " " + offer_name
43
44 offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')
45 image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')
46 price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \
47 .find('ins').get_text().split(' ')[0].strip())
48
49 response2 = requests.get(offer_url)
50 response2.encoding = 'utf-8'
51 soup2 = BeautifulSoup(response2.text, 'html.parser')
52
53 specifications = soup2.find('main', {'id': 'content'}) \
54 .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
55 .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')
56
57 offer_description = ''
58 for specification in specifications:
59 if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):
60 continue
61 offer_description += unicodedata.normalize('NFKD',
62 str(specification.get_text(separator='\n').strip())) + "\n"
63
64 insert_script = 'INSERT INTO phone_offers (offer_shop, brand,' \
65 ' offer_name, price, image_url, offer_url, last_updated, is_validated, offer_description) ' \
66 'VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);'
67 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url,
68 last_updated, is_validated, offer_description)
69 cur.execute(insert_script, insert_value)
70 db_connection.commit()
71 i += 20
72
73cur.close()
74db_connection.close()
Note: See TracBrowser for help on using the repository browser.