source: phonelux_scrappers/scrappers/mobilezone_scrapper.py@ b68ae8d

Last change on this file since b68ae8d was b68ae8d, checked in by Marko <Marko@…>, 22 months ago

Created spring app, edited setec_scrapper

  • Property mode set to 100644
File size: 3.8 KB
Line 
1import unicodedata
2from datetime import datetime
3import psycopg2
4import config_read
5from bs4 import BeautifulSoup
6from selenium import webdriver
7import requests
8
9import sys
10
11file_path = 'outputfile.txt'
12sys.stdout = open(file_path, "w")
13
14# Call to read the configuration file and connect to database
15cinfo = config_read.get_databaseconfig("../postgresdb.config")
16db_connection = psycopg2.connect(
17 database=cinfo[0],
18 host=cinfo[1],
19 user=cinfo[2],
20 password=cinfo[3]
21)
22cur = db_connection.cursor()
23
24offer_shop = "Mobile Zone" # offer shop
25last_updated = datetime.now().date()
26is_validated = False
27
28for i in range(1, 3):
29 mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'
30
31 response1 = requests.get(mobilezone_url)
32 soup1 = BeautifulSoup(response1.content, 'html.parser')
33
34 phones = soup1.find('ul', {
35 'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '
36 '--paginated columns-4'}).find_all('li')
37
38 for phone in phones:
39 offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(
40 'href')
41 image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \
42 .find('img').get('data-lazy-src')
43
44 brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})
45
46 if brand_section is not None:
47 brand = brand_section.find('a').get_text().strip()
48 else:
49 brand = None
50
51 offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip()
52
53 if brand is not None and brand not in offer_name:
54 offer_name = brand + ' ' + offer_name
55
56 price = int(unicodedata.normalize('NFKD', phone.find('span', {'class': 'woocommerce-Price-amount amount'})
57 .find('bdi').get_text().replace(',', '').replace('ден', '').strip()))
58
59 response2 = requests.get(offer_url)
60 soup2 = BeautifulSoup(response2.text, 'html.parser')
61
62 specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr')
63
64 back_camera = None
65 front_camera = None
66 rom_memory = None
67 battery = None
68 color = None
69
70 for specification in specifications:
71 if 'Главна камера' in specification.find('th').get_text():
72 back_camera = specification.find('td').get_text().strip()
73
74 if 'Селфи камера' in specification.find('th').get_text():
75 front_camera = specification.find('td').get_text().strip()
76
77 if 'Батерија' in specification.find('th').get_text():
78 battery = specification.find('td').get_text().strip()
79
80 if 'Меморија' in specification.find('th').get_text():
81 rom_memory = specification.find('td').get_text().strip()
82
83 if 'Боја' in specification.find('th').get_text():
84 color = specification.find('td').get_text().strip()
85
86
87
88 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, offer_url, image_url, ' \
89 'rom_memory, battery, color, front_camera, back_camera, last_updated, is_validated)' \
90 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
91 insert_value = (offer_shop, brand, offer_name, price, offer_url, image_url, rom_memory, battery, color,
92 front_camera, back_camera, last_updated, is_validated)
93 cur.execute(insert_script, insert_value)
94 db_connection.commit()
95
96cur.close()
97db_connection.close()
Note: See TracBrowser for help on using the repository browser.