source: phonelux_scrappers/scrappers/mobigo_scrapper.py@ dfd5d87

Last change on this file since dfd5d87 was b68ae8d, checked in by Marko <Marko@…>, 23 months ago

Created spring app, edited setec_scrapper

  • Property mode set to 100644
File size: 4.8 KB
Line 
1from datetime import datetime
2
3import psycopg2
4import config_read
5from bs4 import BeautifulSoup
6import requests
7
8# import sys
9#
10# file_path = 'outputfile.txt'
11# sys.stdout = open(file_path, "w")
12
13# Call to read the configuration file and connect to database
14cinfo = config_read.get_databaseconfig("../postgresdb.config")
15db_connection = psycopg2.connect(
16 database=cinfo[0],
17 host=cinfo[1],
18 user=cinfo[2],
19 password=cinfo[3]
20)
21cur = db_connection.cursor()
22
23offer_shop = "Mobi Go" # offer shop
24last_updated = datetime.now().date()
25is_validated = False
26for i in range(1, 6):
27 mobigo_url = "https://mobigo.mk/page/" + str(i) + "/"
28
29 response1 = requests.get(mobigo_url)
30
31 soup1 = BeautifulSoup(response1.content, 'html.parser')
32
33 phone_sections = soup1.find_all('ul', {'class': 'recent-posts'})
34 phones = phone_sections[len(phone_sections) - 1].find_all('li')
35
36 for phone in phones:
37 offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href') # offer url
38 image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src') # image url
39 offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip() # offer_name
40
41 if "Watch" in offer_name or "Tab" in offer_name: # if the product is watch or tablet, continue
42 continue
43
44 price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \
45 .get_text().replace('ден.', '').replace('.', '').strip())) # price
46
47 response2 = requests.get(offer_url)
48 soup2 = BeautifulSoup(response2.content, 'html.parser')
49
50 brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip() # brand
51
52 if brand not in offer_name:
53 offer_name = brand + " " + offer_name
54
55 specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr')
56
57 ram_memory = ""
58 rom_memory = ""
59 battery = ""
60 back_camera = ""
61 front_camera = ""
62 chipset = ""
63 operating_system = ""
64
65 for specification in specifications:
66 if specification.find('td') == None:
67 continue
68
69 # operating system
70 if specification.find('td').get_text() == "Платформа":
71 if specification.find('i').get_text() != "/":
72 operating_system = specification.find('i').get_text().strip()
73 else:
74 operating_system = None
75
76 # chipset
77 if specification.find('td').get_text() == "Chipset":
78 if specification.find('i').get_text() != "/":
79 chipset = specification.find('i').get_text().strip()
80 else:
81 chipset = None
82
83 # ram and rom memory
84 if specification.find('td').get_text() == "Меморија":
85 if specification.find('i').get_text() != "/":
86 rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip()
87 ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip()
88 else:
89 rom_memory = None
90 ram_memory = None
91
92 # back camera
93 if specification.find('td').get_text() == "Главна Камера":
94 if specification.find('i').get_text() != "/":
95 back_camera = specification.find('i').get_text().strip()
96 else:
97 back_camera = None
98
99 # front camera
100 if specification.find('td').get_text() == "Селфи Камера":
101 if specification.find('i').get_text() != "/":
102 front_camera = specification.find('i').get_text().strip()
103 else:
104 front_camera = None
105
106 # battery
107 if specification.find('td').get_text() == "Батерија":
108 if specification.find('i').get_text() != "/":
109 battery = specification.find('i').get_text().strip()
110 else:
111 battery = None
112
113 insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,' \
114 ' rom_memory, battery, back_camera, front_camera, chipset, operating_system, last_updated, is_validated)' \
115 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
116 insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,
117 rom_memory, battery, back_camera, front_camera, chipset, operating_system, last_updated, is_validated)
118 cur.execute(insert_script, insert_value)
119 db_connection.commit()
120
121cur.close()
122db_connection.close()
Note: See TracBrowser for help on using the repository browser.