source: phonelux_scrappers/scrappers/a1_scrapper.py@ e5b84dc

Last change on this file since e5b84dc was b68ae8d, checked in by Marko <Marko@…>, 2 years ago

Created spring app, edited setec_scrapper

  • Property mode set to 100644
File size: 4.2 KB
RevLine 
[b68ae8d]1import unicodedata
2from datetime import datetime
3
4import psycopg2
5import config_read
6from bs4 import BeautifulSoup
7import requests
8
9import sys
10
11file_path = 'outputfile.txt'
12sys.stdout = open(file_path, "w")
13
14# Call to read the configuration file and connect to database
15cinfo = config_read.get_databaseconfig("../postgresdb.config")
16db_connection = psycopg2.connect(
17 database=cinfo[0],
18 host=cinfo[1],
19 user=cinfo[2],
20 password=cinfo[3]
21)
22cur = db_connection.cursor()
23
24offer_shop = "A1" # offer shop
25last_updated = datetime.now().date()
26is_validated = False
27
28a1_url = 'https://www.a1.mk/webshop/mk/phones'
29
30response1 = requests.get(a1_url)
31soup1 = BeautifulSoup(response1.content, 'html.parser')
32
33phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
34 .find_all('div', {'class', 'dvc-idtfr by4'})
35
36for phone in phones:
37 brand = phone.get('data-brand').strip()
38 offer_name = brand+" "+phone.get('data-model').strip()
39
40 # if brand not in offer_name:
41 # offer_name = brand+" "+offer_name
42
43 offer_shop_code = phone.get('data-productid').strip()
44 offer_url = phone.find('a', {'class', 'device-link'}).get('href')
45 image_url = phone.get('data-image')
46
47 response2 = requests.get(offer_url)
48 soup2 = BeautifulSoup(response2.content, 'html.parser')
49
50 temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
51 .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
52
53 # offer price
54 price = None
55 for temp_price in temp_prices:
56 if 'Цена само за уред' in temp_price.get_text().strip():
57 price = int(temp_price.get_text().replace('Цена само за уред', '')
58 .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
59
60 colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
61
62 temp_colors = []
63 for color_section in colors_section:
64 temp_colors.append(color_section.get('data-content'))
65
66 color = ','.join(temp_colors) # colors available for the offer
67
68 phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
69
70 table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
71
72 back_camera = None
73 operating_system = None
74 cpu = None
75 rom_memory = None
76 ram_memory = None
77 battery = None
78 front_camera = None
79
80 for row in table_rows:
81 if 'Камера' in row.get_text().strip():
82 back_camera = row.get_text().replace('Камера', '').strip()
83
84 if 'Оперативен систем' in row.get_text().strip():
85 operating_system = row.get_text().replace('Оперативен систем', '').strip()
86
87 if 'CPU' in row.get_text().strip():
88 cpu = row.get_text().replace('CPU', '').strip()
89
90 if 'Вградена меморија' in row.get_text().strip():
91 rom_memory = row.get_text().replace('Вградена меморија', '').strip()
92
93 if 'RAM меморија' in row.get_text().strip():
94 ram_memory = row.get_text().replace('RAM меморија', '').strip()
95
96 if 'Батерија' in row.get_text().strip():
97 battery = row.get_text().replace('Батерија', '').strip()
98
99 if 'Предна камера' in row.get_text().strip():
100 front_camera = row.get_text().replace('Предна камера', '').strip()
101
102 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \
103 'ram_memory, rom_memory, battery, back_camera, front_camera, color, cpu, ' \
104 'operating_system, offer_shop_code, last_updated, is_validated)' \
105 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
106 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, rom_memory,
107 battery, back_camera, front_camera, color, cpu, operating_system, offer_shop_code,
108 last_updated, is_validated)
109 cur.execute(insert_script, insert_value)
110 db_connection.commit()
111
112cur.close()
113db_connection.close()
Note: See TracBrowser for help on using the repository browser.