Ignore:
Timestamp:
10/01/22 22:55:27 (21 months ago)
Author:
Marko <Marko@…>
Branches:
master
Children:
fd5b100
Parents:
48f3030
Message:

Refactored code

Location:
phonelux_scrappers/scrappers
Files:
1 deleted
12 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/a1_scrapper.py

    r48f3030 r895cd87  
    11import unicodedata
    22from datetime import datetime
    3 
     3import json
    44import psycopg2
    55import config_read
    66from bs4 import BeautifulSoup
    77import requests
     8import sys
     9import unicodedata
    810
    9 import sys
     11from classes.phoneoffer import PhoneOffer
    1012
    1113file_path = 'outputfile.txt'
    1214sys.stdout = open(file_path, "w")
    1315
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    23 
    2416offer_shop = "A1"  # offer shop
    2517last_updated = datetime.now().date()
    2618is_validated = False
     19
     20# A1 phone offers that are already in database
     21
     22offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
     23
     24database_offers = []
     25
     26for offer in offers:
     27    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     28                            offer['ram_memory'],
     29                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     30                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     31                            offer['image_url'],
     32                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     33                            offer['offer_description'],
     34                            offer['offer_shop_code'])
     35    database_offers.append(phoneOffer)
    2736
    2837a1_url = 'https://www.a1.mk/webshop/mk/phones'
     
    3443    .find_all('div', {'class', 'dvc-idtfr by4'})
    3544
     45new_offers = []
     46
    3647for phone in phones:
    3748    brand = phone.get('data-brand').strip()
    38     offer_name = brand+" "+phone.get('data-model').strip()
     49    offer_name = brand + " " + phone.get('data-model').strip()
    3950
    4051    # if brand not in offer_name:
     
    7788    battery = None
    7889    front_camera = None
     90    chipset = None
     91    offer_description = None
    7992
    8093    for row in table_rows:
     
    100113            front_camera = row.get_text().replace('Предна камера', '').strip()
    101114
    102     insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \
    103                     'ram_memory, rom_memory, battery, back_camera, front_camera, color, cpu, ' \
    104                     'operating_system, offer_shop_code, last_updated, is_validated)' \
    105                     ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    106     insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, rom_memory,
    107                     battery, back_camera, front_camera, color, cpu, operating_system, offer_shop_code,
    108                     last_updated, is_validated)
    109     cur.execute(insert_script, insert_value)
    110     db_connection.commit()
     115    new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     116                                 color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url,
     117                                 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    111118
    112 cur.close()
    113 db_connection.close()
     119for new_offer in new_offers:
     120    flag = False
     121    flag_price = False
     122    offer_id = None
     123
     124    for old_offer in database_offers:
     125
     126        if new_offer.offer_shop_code == old_offer.offer_shop_code:
     127            flag = True
     128            if new_offer.price != old_offer.price:
     129                flag_price = True
     130                offer_id = old_offer.offer_id
     131
     132    if flag:
     133        # print('ALREADY IN DATABASE')
     134        # print(new_offer)
     135        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     136        if flag_price:
     137            print('PRICE CHANGED!')  # CHANGE PRICE
     138            print('offer id: ' + str(offer_id))
     139            headers = {'Content-type': 'application/json'}
     140            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     141                         headers=headers)
     142    else:
     143        print('ADDED')  # ADD OFFER
     144        print(new_offer)
     145        headers = {'Content-type': 'application/json'}
     146        requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__,
     147                                                                                                    default=str))
     148
     149print('------------------------------------')
     150
     151for old_offer in database_offers:
     152    flag = False
     153    for new_offer in new_offers:
     154        if old_offer.offer_shop_code == new_offer.offer_shop_code:
     155            flag = True
     156
     157    if not flag:
     158        print('OFFER DELETED')
     159        print(old_offer)
     160        # DELETE OFFER
     161        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/akcija_scrapper.py

    r48f3030 r895cd87  
     1import json
    12from datetime import datetime
    23
     
    78import unicodedata
    89import sys
     10from classes.phoneoffer import PhoneOffer
    911
    10 # file_path = '../outputfile.txt'
    11 # sys.stdout = open(file_path, "w")
     12file_path = 'outputfile.txt'
     13sys.stdout = open(file_path, "w")
    1214
    13 # Call to read the configuration file and connect to database
    14 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    15 db_connection = psycopg2.connect(
    16     database=cinfo[0],
    17     host=cinfo[1],
    18     user=cinfo[2],
    19     password=cinfo[3]
    20 )
    21 cur = db_connection.cursor()
    2215
    2316offer_shop = "Akcija"  # offer shop
    2417last_updated = datetime.now().date()
    2518is_validated = False
     19
     20# Akcija phone offers that are already in database
     21
     22offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text))
     23
     24database_offers = []
     25
     26for offer in offers:
     27    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     28                            offer['ram_memory'],
     29                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     30                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     31                            offer['image_url'],
     32                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     33                            offer['offer_description'],
     34                            offer['offer_shop_code'])
     35    database_offers.append(phoneOffer)
     36
     37new_offers = []
    2638
    2739i = 0
     
    5163        soup2 = BeautifulSoup(response2.text, 'html.parser')
    5264
     65        back_camera = None
     66        operating_system = None
     67        chipset = None
     68        battery = None
     69        ram_memory = None
     70        rom_memory = None
     71        cpu = None
     72        front_camera = None
     73        color = None
     74        offer_shop_code = None
     75
    5376        specifications = soup2.find('main', {'id': 'content'}) \
    5477            .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
     
    6285                                                       str(specification.get_text(separator='\n').strip())) + "\n"
    6386
    64         insert_script = 'INSERT INTO phone_offers (offer_shop, brand,' \
    65                         ' offer_name, price, image_url, offer_url, last_updated, is_validated, offer_description) ' \
    66                         'VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);'
    67         insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url,
    68                         last_updated, is_validated, offer_description)
    69         cur.execute(insert_script, insert_value)
    70         db_connection.commit()
     87        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     88                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     89                                     image_url,
     90                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    7191    i += 20
    7292
    73 cur.close()
    74 db_connection.close()
     93for new_offer in new_offers:
     94    flag = False
     95    flag_price = False
     96    offer_id = None
     97
     98    for old_offer in database_offers:
     99
     100        if new_offer.offer_name == old_offer.offer_name:
     101            flag = True
     102            if new_offer.price != old_offer.price:
     103                flag_price = True
     104                offer_id = old_offer.offer_id
     105
     106    if flag:
     107        # print('ALREADY IN DATABASE')
     108        # print(new_offer)
     109        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     110        if flag_price:
     111            print('PRICE CHANGED!')  # CHANGE PRICE
     112            print('offer id: ' + str(offer_id))
     113            headers = {'Content-type': 'application/json'}
     114            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     115                         headers=headers)
     116    else:
     117        print('ADDED')  # ADD OFFER
     118        print(new_offer)
     119        headers = {'Content-type': 'application/json'}
     120        requests.post('http://localhost:8080/phoneoffer/addoffer',
     121                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     122
     123print('------------------------------------')
     124
     125for old_offer in database_offers:
     126    flag = False
     127    for new_offer in new_offers:
     128        if old_offer.offer_name == new_offer.offer_name:
     129            flag = True
     130
     131    if not flag:
     132        print('OFFER DELETED')
     133        print(old_offer)
     134        # DELETE OFFER
     135        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/handy_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
     
    910import sys
    1011
     12from classes.phoneoffer import PhoneOffer
     13
    1114file_path = 'outputfile.txt'
    1215sys.stdout = open(file_path, "w")
    1316
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    2317
    2418offer_shop = "Handy"  # offer shop
    2519last_updated = datetime.now().date()
    2620is_validated = False
     21
     22# Handy phone offers that are already in database
     23offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text))
     24
     25database_offers = []
     26
     27for offer in offers:
     28    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     29                            offer['ram_memory'],
     30                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     31                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     32                            offer['image_url'],
     33                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     34                            offer['offer_description'],
     35                            offer['offer_shop_code'])
     36    database_offers.append(phoneOffer)
     37
     38new_offers = []
    2739
    2840handy_url = 'https://www.handy.mk/telefoni?page=6'
     
    4456    soup2 = BeautifulSoup(response2.text, 'html.parser')
    4557
     58    back_camera = None
     59    operating_system = None
     60    chipset = None
     61    battery = None
     62    ram_memory = None
     63    rom_memory = None
     64    cpu = None
     65    front_camera = None
     66    offer_shop_code = None
     67    color = None
     68    image_url = None
     69
    4670    color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'})
    47 
    48     color = None
    4971    if color_section is not None:
    50         temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3563640754__container'})\
     72        temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\
    5173            .find_all('input', {'type': 'radio'})
    5274        colors_list = []
     
    6789    offer_description = '\n'.join(specifications)
    6890
    69     insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, offer_url, ' \
    70                     'offer_description, last_updated, is_validated)' \
    71                             ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s);'
    72     insert_value = (offer_shop, brand, offer_name, price, offer_url, offer_description,
    73                             last_updated, is_validated)
    74     cur.execute(insert_script, insert_value)
    75     db_connection.commit()
     91    new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     92                                 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     93                                 image_url,
     94                                 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    7695
    77 cur.close()
    78 db_connection.close()
     96for new_offer in new_offers:
     97    flag = False
     98    flag_price = False
     99    offer_id = None
     100
     101    for old_offer in database_offers:
     102
     103        if new_offer.offer_name == old_offer.offer_name:
     104            flag = True
     105            if new_offer.price != old_offer.price:
     106                flag_price = True
     107                offer_id = old_offer.offer_id
     108
     109    if flag:
     110        # print('ALREADY IN DATABASE')
     111        # print(new_offer)
     112        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     113        if flag_price:
     114            print('PRICE CHANGED!')  # CHANGE PRICE
     115            print('offer id: ' + str(offer_id))
     116            headers = {'Content-type': 'application/json'}
     117            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     118                         headers=headers)
     119    else:
     120        print('ADDED')  # ADD OFFER
     121        print(new_offer)
     122        headers = {'Content-type': 'application/json'}
     123        requests.post('http://localhost:8080/phoneoffer/addoffer',
     124                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     125
     126print('------------------------------------')
     127
     128for old_offer in database_offers:
     129    flag = False
     130    for new_offer in new_offers:
     131        if old_offer.offer_name == new_offer.offer_name:
     132            flag = True
     133
     134    if not flag:
     135        print('OFFER DELETED')
     136        print(old_offer)
     137        # DELETE OFFER
     138        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     139
     140
  • phonelux_scrappers/scrappers/ledikom_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
    3 
    44import psycopg2
    55import config_read
     
    1010import sys
    1111
     12from classes.phoneoffer import PhoneOffer
     13
    1214file_path = 'outputfile.txt'
    1315sys.stdout = open(file_path, "w")
    14 
    15 # Call to read the configuration file and connect to database
    16 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    17 db_connection = psycopg2.connect(
    18     database=cinfo[0],
    19     host=cinfo[1],
    20     user=cinfo[2],
    21     password=cinfo[3]
    22 )
    23 cur = db_connection.cursor()
    2416
    2517offer_shop = "Ledikom"  # offer shop
    2618last_updated = datetime.now().date()
    2719is_validated = False
     20
     21# Ledikom phone offers that are already in database
     22
     23offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/ledikom').text))
     24
     25database_offers = []
     26
     27for offer in offers:
     28    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     29                            offer['ram_memory'],
     30                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     31                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     32                            offer['image_url'],
     33                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     34                            offer['offer_description'],
     35                            offer['offer_shop_code'])
     36    database_offers.append(phoneOffer)
     37
     38new_offers = []
    2839
    2940ledikom_phone_urls = [
     
    6576        offer_name = ' '.join(temp_offer_name.split())
    6677        brand = offer_name.split(' ')[0]
    67         price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '').replace('.', '').strip())
     78        price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '')
     79                    .replace('ден', '')
     80                    .replace('.', '').strip())
    6881
    6982        driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     
    8295        rom_memory = None
    8396        ram_memory = None
     97        back_camera = None
     98        operating_system = None
     99        chipset = None
     100        battery = None
     101        cpu = None
     102        front_camera = None
     103        offer_shop_code = None
     104        offer_description = None
    84105
    85106        if len(specifications) != 0:
     
    114135            color = temp
    115136
    116         insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \
    117                         'ram_memory, rom_memory, color, last_updated, is_validated)' \
    118                         ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    119         insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,
    120                         rom_memory, color, last_updated, is_validated)
    121         cur.execute(insert_script, insert_value)
    122         db_connection.commit()
     137        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     138                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     139                                     image_url,
     140                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    123141
    124 cur.close()
    125 db_connection.close()
     142for new_offer in new_offers:
     143    flag = False
     144    flag_price = False
     145    offer_id = None
     146
     147    for old_offer in database_offers:
     148
     149        if new_offer.offer_name == old_offer.offer_name:
     150            flag = True
     151            if new_offer.price != old_offer.price:
     152                flag_price = True
     153                offer_id = old_offer.offer_id
     154
     155    if flag:
     156        # print('ALREADY IN DATABASE')
     157        # print(new_offer)
     158        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     159        if flag_price:
     160            print('PRICE CHANGED!')  # CHANGE PRICE
     161            print('offer id: ' + str(offer_id))
     162            headers = {'Content-type': 'application/json'}
     163            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     164                         headers=headers)
     165    else:
     166        print('ADDED')  # ADD OFFER
     167        print(new_offer)
     168        headers = {'Content-type': 'application/json'}
     169        requests.post('http://localhost:8080/phoneoffer/addoffer',
     170                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     171
     172print('------------------------------------')
     173
     174for old_offer in database_offers:
     175    flag = False
     176    for new_offer in new_offers:
     177        if old_offer.offer_name == new_offer.offer_name:
     178            flag = True
     179
     180    if not flag:
     181        print('OFFER DELETED')
     182        print(old_offer)
     183        # DELETE OFFER
     184        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/mobelix_scrapper.py

    r48f3030 r895cd87  
     1import json
     2import sys
    13import unicodedata
    24from datetime import datetime
     
    810
    911# import sys
    10 #
    11 # file_path = 'outputfile.txt'
    12 # sys.stdout = open(file_path, "w")
     12from classes.phoneoffer import PhoneOffer
    1313
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
     14file_path = 'outputfile.txt'
     15sys.stdout = open(file_path, "w")
    2316
    2417offer_shop = "Mobelix"  # offer shop
    2518last_updated = datetime.now().date()
    2619is_validated = False
     20
     21# Mobelix phone offers that are already in database
     22
     23offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text))
     24
     25database_offers = []
     26
     27for offer in offers:
     28    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     29                            offer['ram_memory'],
     30                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     31                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     32                            offer['image_url'],
     33                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     34                            offer['offer_description'],
     35                            offer['offer_shop_code'])
     36    database_offers.append(phoneOffer)
     37
     38new_offers = []
    2739
    2840for i in range(1, 17):
     
    7789        back_camera = ''
    7890        cpu = None
     91        offer_shop_code = None
     92        offer_description = None
    7993
    8094        for table in tables:
     
    120134            back_camera = None
    121135
    122         insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \
    123                         'ram_memory, rom_memory, battery, back_camera, front_camera, color, cpu, chipset, ' \
    124                         'operating_system, last_updated, is_validated)' \
    125                         ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    126         insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory, rom_memory,
    127                         battery, back_camera, front_camera, color, cpu, chipset, operating_system,
    128                         last_updated, is_validated)
    129         cur.execute(insert_script, insert_value)
    130         db_connection.commit()
     136        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     137                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     138                                     image_url,
     139                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    131140
    132 cur.close()
    133 db_connection.close()
     141
     142for new_offer in new_offers:
     143    flag = False
     144    flag_price = False
     145    offer_id = None
     146
     147    for old_offer in database_offers:
     148
     149        if new_offer.offer_name == old_offer.offer_name:
     150            flag = True
     151            if new_offer.price != old_offer.price:
     152                flag_price = True
     153                offer_id = old_offer.offer_id
     154
     155    if flag:
     156        # print('ALREADY IN DATABASE')
     157        # print(new_offer)
     158        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     159        if flag_price:
     160            print('PRICE CHANGED!')  # CHANGE PRICE
     161            print('offer id: ' + str(offer_id))
     162            headers = {'Content-type': 'application/json'}
     163            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     164                         headers=headers)
     165    else:
     166        print('ADDED')  # ADD OFFER
     167        print(new_offer)
     168        headers = {'Content-type': 'application/json'}
     169        requests.post('http://localhost:8080/phoneoffer/addoffer',
     170                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     171
     172print('------------------------------------')
     173
     174for old_offer in database_offers:
     175    flag = False
     176    for new_offer in new_offers:
     177        if old_offer.offer_name == new_offer.offer_name:
     178            flag = True
     179
     180    if not flag:
     181        print('OFFER DELETED')
     182        print(old_offer)
     183        # DELETE OFFER
     184        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/mobigo_scrapper.py

    r48f3030 r895cd87  
     1import json
     2import unicodedata
    13from datetime import datetime
    24
     
    57from bs4 import BeautifulSoup
    68import requests
     9import sys
    710
    8 # import sys
    9 #
    10 # file_path = 'outputfile.txt'
    11 # sys.stdout = open(file_path, "w")
     11from classes.phoneoffer import PhoneOffer
    1212
    13 # Call to read the configuration file and connect to database
    14 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    15 db_connection = psycopg2.connect(
    16     database=cinfo[0],
    17     host=cinfo[1],
    18     user=cinfo[2],
    19     password=cinfo[3]
    20 )
    21 cur = db_connection.cursor()
     13file_path = 'outputfile.txt'
     14sys.stdout = open(file_path, "w")
    2215
    2316offer_shop = "Mobi Go"  # offer shop
    2417last_updated = datetime.now().date()
    2518is_validated = False
     19
     20# Mobi Go phone offers that are already in database
     21
     22offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text))
     23
     24database_offers = []
     25
     26for offer in offers:
     27    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     28                            offer['ram_memory'],
     29                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     30                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     31                            offer['image_url'],
     32                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     33                            offer['offer_description'],
     34                            offer['offer_shop_code'])
     35    database_offers.append(phoneOffer)
     36
     37new_offers = []
     38
     39
    2640for i in range(1, 6):
    2741    mobigo_url = "https://mobigo.mk/page/" + str(i) + "/"
     
    5569        specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr')
    5670
    57         ram_memory = ""
    58         rom_memory = ""
    59         battery = ""
    60         back_camera = ""
    61         front_camera = ""
    62         chipset = ""
    63         operating_system = ""
     71        ram_memory = None
     72        rom_memory = None
     73        battery = None
     74        back_camera = None
     75        front_camera = None
     76        chipset = None
     77        operating_system = None
     78        cpu = None
     79        offer_shop_code = None
     80        offer_description = None
     81        color = None
    6482
    6583        for specification in specifications:
     
    111129                    battery = None
    112130
    113         insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,' \
    114                         ' rom_memory, battery, back_camera, front_camera, chipset, operating_system, last_updated, is_validated)' \
    115                         ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    116         insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,
    117                         rom_memory, battery, back_camera, front_camera, chipset, operating_system, last_updated, is_validated)
    118         cur.execute(insert_script, insert_value)
    119         db_connection.commit()
     131        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     132                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     133                                     image_url,
     134                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    120135
    121 cur.close()
    122 db_connection.close()
     136
     137for new_offer in new_offers:
     138    flag = False
     139    flag_price = False
     140    offer_id = None
     141
     142    for old_offer in database_offers:
     143
     144        if new_offer.offer_name == old_offer.offer_name:
     145            flag = True
     146            if new_offer.price != old_offer.price:
     147                flag_price = True
     148                offer_id = old_offer.offer_id
     149
     150    if flag:
     151        print('ALREADY IN DATABASE')
     152        print(new_offer)
     153        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     154        if flag_price:
     155            print('PRICE CHANGED!')  # CHANGE PRICE
     156            print('offer id: ' + str(offer_id))
     157            headers = {'Content-type': 'application/json'}
     158            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     159                         headers=headers)
     160    else:
     161        print('ADDED')  # ADD OFFER
     162        print(new_offer)
     163        headers = {'Content-type': 'application/json'}
     164        requests.post('http://localhost:8080/phoneoffer/addoffer',
     165                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     166
     167print('------------------------------------')
     168
     169for old_offer in database_offers:
     170    flag = False
     171    for new_offer in new_offers:
     172        if old_offer.offer_name == new_offer.offer_name:
     173            flag = True
     174
     175    if not flag:
     176        print('OFFER DELETED')
     177        print(old_offer)
     178        # DELETE OFFER
     179        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/mobilezone_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
     
    67from selenium import webdriver
    78import requests
     9import sys
    810
    9 import sys
     11from classes.phoneoffer import PhoneOffer
    1012
    1113file_path = 'outputfile.txt'
    1214sys.stdout = open(file_path, "w")
    1315
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    23 
    2416offer_shop = "Mobile Zone"  # offer shop
    2517last_updated = datetime.now().date()
    2618is_validated = False
     19
     20# Mobile Zone phone offers that are already in database
     21
     22offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text))
     23
     24database_offers = []
     25
     26for offer in offers:
     27    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     28                            offer['ram_memory'],
     29                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     30                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     31                            offer['image_url'],
     32                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     33                            offer['offer_description'],
     34                            offer['offer_shop_code'])
     35    database_offers.append(phoneOffer)
     36
     37new_offers = []
    2738
    2839for i in range(1, 3):
     
    5465            offer_name = brand + ' ' + offer_name
    5566
    56         price = int(unicodedata.normalize('NFKD', phone.find('span', {'class': 'woocommerce-Price-amount amount'})
    57                                           .find('bdi').get_text().replace(',', '').replace('ден', '').strip()))
     67        price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'})
     68        price = None
     69
     70        if price_tag is not None:
     71            price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text()
     72                                          .replace(',', '')
     73                                          .replace('ден', '').strip()))
     74        else:
     75            continue
    5876
    5977        response2 = requests.get(offer_url)
     
    6583        front_camera = None
    6684        rom_memory = None
     85        ram_memory = None
     86        operating_system = None
     87        cpu = None
     88        chipset = None
     89        offer_description = None
     90        offer_shop_code = None
    6791        battery = None
    6892        color = None
     
    84108                color = specification.find('td').get_text().strip()
    85109
     110        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     111                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     112                                     image_url,
     113                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    86114
     115for new_offer in new_offers:
     116    flag = False
     117    flag_price = False
     118    offer_id = None
    87119
    88         insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, offer_url, image_url, ' \
    89                         'rom_memory, battery, color, front_camera, back_camera, last_updated, is_validated)' \
    90                                 ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    91         insert_value = (offer_shop, brand, offer_name, price, offer_url, image_url, rom_memory, battery, color,
    92                         front_camera, back_camera, last_updated, is_validated)
    93         cur.execute(insert_script, insert_value)
    94         db_connection.commit()
     120    for old_offer in database_offers:
    95121
    96 cur.close()
    97 db_connection.close()
     122        if new_offer.offer_name == old_offer.offer_name:
     123            flag = True
     124            if new_offer.price != old_offer.price:
     125                flag_price = True
     126                offer_id = old_offer.offer_id
     127
     128    if flag:
     129        # print('ALREADY IN DATABASE')
     130        # print(new_offer)
     131        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     132        if flag_price:
     133            print('PRICE CHANGED!')  # CHANGE PRICE
     134            print('offer id: ' + str(offer_id))
     135            headers = {'Content-type': 'application/json'}
     136            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     137                         headers=headers)
     138    else:
     139        print('ADDED')  # ADD OFFER
     140        print(new_offer)
     141        headers = {'Content-type': 'application/json'}
     142        requests.post('http://localhost:8080/phoneoffer/addoffer',
     143                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     144
     145print('------------------------------------')
     146
     147for old_offer in database_offers:
     148    flag = False
     149    for new_offer in new_offers:
     150        if old_offer.offer_name == new_offer.offer_name:
     151            flag = True
     152
     153    if not flag:
     154        print('OFFER DELETED')
     155        print(old_offer)
     156        # DELETE OFFER
     157        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/mobitech_scrapper.py

    r48f3030 r895cd87  
     1import json
     2import unicodedata
    13from datetime import datetime
    24
     
    57from bs4 import BeautifulSoup
    68import requests
     9import sys
    710
    8 # import sys
    9 # file_path = 'outputfile.txt'
    10 # sys.stdout = open(file_path, "w")
     11from classes.phoneoffer import PhoneOffer
    1112
    12 # Call to read the configuration file and connect to database
    13 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    14 db_connection = psycopg2.connect(
    15     database=cinfo[0],
    16     host=cinfo[1],
    17     user=cinfo[2],
    18     password=cinfo[3]
    19 )
    20 cur = db_connection.cursor()
     13file_path = 'outputfile.txt'
     14sys.stdout = open(file_path, "w")
     15
    2116
    2217mobitech_url = "https://mobitech.mk/shop/"
     
    2924
    3025offer_shop = "Mobitech"  # offer shop
     26last_updated = datetime.now().date()
    3127is_validated = False
     28
     29# Mobitech phone offers that are already in database
     30
     31offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text))
     32
     33database_offers = []
     34
     35for offer in offers:
     36    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     37                            offer['ram_memory'],
     38                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     39                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     40                            offer['image_url'],
     41                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     42                            offer['offer_description'],
     43                            offer['offer_shop_code'])
     44    database_offers.append(phoneOffer)
     45
     46new_offers = []
    3247
    3348for phone in phones:
     
    4055    temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi')
    4156    price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price
    42     last_updated = datetime.now().date()  # offer last_updated date
    4357
    4458    response2 = requests.get(offer_url)
     
    4761    specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'})
    4862
    49     ram_memory = ""
    50     rom_memory = ""
    51     battery = ""
    52     back_camera = ""
    53     operating_system = ""
     63    ram_memory = None
     64    rom_memory = None
     65    battery = None
     66    back_camera = None
     67    front_camera = None
     68    operating_system = None
     69    chipset = None
     70    color = None
     71    offer_shop_code = None
     72    cpu = None
     73    offer_description = None
    5474
    5575    for specification in specifications:
     
    6282        # ram memory
    6383        if specification.get_text().startswith("РАМ Меморија:"):
    64             ram_memory = specification.get_text().split("РАМ Меморија:")[1].strip()
     84            ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\
     85                .replace('Ram', '').strip()
    6586            if ram_memory == "Нема" or ram_memory == "/":
    6687                ram_memory = None
     
    84105                battery = None
    85106
    86     insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,' \
    87                     ' rom_memory, battery, back_camera, last_updated, operating_system, is_validated)' \
    88                     ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    89     insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, ram_memory,
    90                     rom_memory, battery, back_camera, last_updated, operating_system, is_validated)
    91     cur.execute(insert_script, insert_value)
    92     db_connection.commit()
     107    new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     108                                 color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     109                                 image_url,
     110                                 offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    93111
    94 cur.close()
    95 db_connection.close()
     112for new_offer in new_offers:
     113    flag = False
     114    flag_price = False
     115    offer_id = None
     116
     117    for old_offer in database_offers:
     118
     119        if new_offer.offer_name == old_offer.offer_name:
     120            flag = True
     121            if new_offer.price != old_offer.price:
     122                flag_price = True
     123                offer_id = old_offer.offer_id
     124
     125    if flag:
     126        print('ALREADY IN DATABASE')
     127        print(new_offer)
     128        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     129        if flag_price:
     130            print('PRICE CHANGED!')  # CHANGE PRICE
     131            print('offer id: ' + str(offer_id))
     132            headers = {'Content-type': 'application/json'}
     133            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     134                         headers=headers)
     135    else:
     136        print('ADDED')  # ADD OFFER
     137        print(new_offer)
     138        headers = {'Content-type': 'application/json'}
     139        requests.post('http://localhost:8080/phoneoffer/addoffer',
     140                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     141
     142print('------------------------------------')
     143
     144for old_offer in database_offers:
     145    flag = False
     146    for new_offer in new_offers:
     147        if old_offer.offer_name == new_offer.offer_name:
     148            flag = True
     149
     150    if not flag:
     151        print('OFFER DELETED')
     152        print(old_offer)
     153        # DELETE OFFER
     154        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     155
  • phonelux_scrappers/scrappers/neptun_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
     
    910import sys
    1011
     12from classes.phoneoffer import PhoneOffer
     13
    1114file_path = 'outputfile.txt'
    1215sys.stdout = open(file_path, "w")
    13 
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    2316
    2417offer_shop = "Neptun"  # offer shop
     
    2619is_validated = False
    2720
     21# Neptun phone offers that are already in database
     22
     23offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text))
     24
     25database_offers = []
     26
     27for offer in offers:
     28    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     29                            offer['ram_memory'],
     30                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     31                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     32                            offer['image_url'],
     33                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     34                            offer['offer_description'],
     35                            offer['offer_shop_code'])
     36    database_offers.append(phoneOffer)
     37
     38new_offers = []
     39
    2840for i in range(1, 11):
    29     neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page='+str(i)
     41    neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i)
    3042
    3143    # selenium is used because of the dynamic content of the page
     
    7284        offer_description = specifications_table.get_text(separator='\n').strip()
    7385
     86        back_camera = None
    7487        operating_system = None
    7588        chipset = None
     
    7891        rom_memory = None
    7992        cpu = None
     93        front_camera = None
     94        color = None
     95
    8096        for specification in specifications:
    8197            if 'Батерија:' in specification:
     
    105121                operating_system = specification
    106122
    107         insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, image_url, offer_url,' \
    108                         'offer_shop_code, operating_system, battery, chipset, cpu, ram_memory, rom_memory, ' \
    109                         'offer_description, last_updated, is_validated)' \
    110                         ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    111         insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url,
    112                         offer_shop_code, operating_system, battery, chipset, cpu, ram_memory, rom_memory, offer_description,
    113                         last_updated, is_validated)
    114         cur.execute(insert_script, insert_value)
    115         db_connection.commit()
     123        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     124                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     125                                     image_url,
     126                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    116127
    117 cur.close()
    118 db_connection.close()
     128for new_offer in new_offers:
     129    flag = False
     130    flag_price = False
     131    offer_id = None
     132
     133    for old_offer in database_offers:
     134
     135        if new_offer.offer_shop_code == old_offer.offer_shop_code:
     136            flag = True
     137            if new_offer.price != old_offer.price:
     138                flag_price = True
     139                offer_id = old_offer.offer_id
     140
     141    if flag:
     142        # print('ALREADY IN DATABASE')
     143        # print(new_offer)
     144        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     145        if flag_price:
     146            print('PRICE CHANGED!')  # CHANGE PRICE
     147            print('offer id: ' + str(offer_id))
     148            headers = {'Content-type': 'application/json'}
     149            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     150                         headers=headers)
     151    else:
     152        print('ADDED')  # ADD OFFER
     153        print(new_offer)
     154        headers = {'Content-type': 'application/json'}
     155        requests.post('http://localhost:8080/phoneoffer/addoffer',
     156                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     157
     158print('------------------------------------')
     159
     160for old_offer in database_offers:
     161    flag = False
     162    for new_offer in new_offers:
     163        if old_offer.offer_shop_code == new_offer.offer_shop_code:
     164            flag = True
     165
     166    if not flag:
     167        print('OFFER DELETED')
     168        print(old_offer)
     169        # DELETE OFFER
     170        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/outputfile.txt

    r48f3030 r895cd87  
     1ADDED
     2{'offer_shop': 'Mobile Zone', 'offer_name': 'Apple iPhone 14 Pro', 'price': 95499, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Златна, Розева, Сива, Црна', 'front_camera': '12MP', 'back_camera': '48 Mp + 12 Mp + 12 Mp', 'chipset': None, 'battery': '3200mAh', 'operating_system': None, 'cpu': None, 'image_url': 'https://i0.wp.com/mobilezone.mk/wp-content/uploads/2022/09/14-pro-silver.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/iphone-14-pro/', 'last_updated': datetime.date(2022, 10, 1), 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
     3------------------------------------
     4OFFER DELETED
     5{'offer_id': 1179, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung s20 FE', 'price': 24699, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Сина', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/Samsung-Galaxy-S20-FE-blue.png?resize=512%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s20-fe/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
     6OFFER DELETED
     7{'offer_id': 1181, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung Z Flip3 5G', 'price': 39999, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/11.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-z-flip3-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
     8OFFER DELETED
     9{'offer_id': 1180, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung S21 FE 5G', 'price': 30899, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Зелена, Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i1.wp.com/mobilezone.mk/wp-content/uploads/2022/03/5g.jpg?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s21-fe-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
  • phonelux_scrappers/scrappers/setec_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
    3 
    44import psycopg2
    55import config_read
    66from bs4 import BeautifulSoup
    77import requests
     8import sys
    89
    9 import sys
     10from classes.phoneoffer import PhoneOffer
    1011
    1112file_path = 'outputfile.txt'
    1213sys.stdout = open(file_path, "w")
    13 
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    2314
    2415offer_shop = "Setec"  # offer shop
     
    2617is_validated = False
    2718
    28 for i in range(1, 7):
    29     setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page='+str(i)
     19# Setec phone offers that are already in database
     20
     21offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text))
     22
     23database_offers = []
     24
     25for offer in offers:
     26    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     27                            offer['ram_memory'],
     28                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     29                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     30                            offer['image_url'],
     31                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     32                            offer['offer_description'],
     33                            offer['offer_shop_code'])
     34    database_offers.append(phoneOffer)
     35
     36new_offers = []
     37
     38for i in range(1, 9):
     39    setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)
    3040
    3141    response1 = requests.get(setec_url)
     
    4151        brand = offer_name.split(' ')[0]
    4252
     53        back_camera = None
     54        operating_system = None
     55        chipset = None
     56        battery = None
     57        ram_memory = None
     58        rom_memory = None
     59        cpu = None
     60        front_camera = None
     61        color = None
     62
    4363        if 'Cable' in offer_name or 'AirTag' in offer_name:
    4464            continue
     
    4969        offer_shop_code = phone.find('div', {'class': 'right'}) \
    5070            .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()
    51         price = int(phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
    52                     find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'}) \
    53                     .get_text().replace('Ден.', '').replace(',', '').strip())
     71
     72        price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
     73            find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})
     74
     75        if price_tag is None:
     76            price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
     77                find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})
     78
     79        price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())
    5480
    5581        response2 = requests.get(offer_url)
     
    5884        offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')
    5985
    60         insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name , price, image_url, offer_url,' \
    61                         'offer_shop_code, offer_description, last_updated, is_validated)' \
    62                         ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    63         insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url,
    64                         offer_shop_code, offer_description, last_updated, is_validated)
    65         cur.execute(insert_script, insert_value)
    66         db_connection.commit()
     86        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     87                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     88                                     image_url,
     89                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    6790
    68 cur.close()
    69 db_connection.close()
     91for new_offer in new_offers:
     92    flag = False
     93    flag_price = False
     94    offer_id = None
     95
     96    for old_offer in database_offers:
     97
     98        if new_offer.offer_shop_code == old_offer.offer_shop_code:
     99            flag = True
     100            if new_offer.price != old_offer.price:
     101                flag_price = True
     102                offer_id = old_offer.offer_id
     103
     104    if flag:
     105        # print('ALREADY IN DATABASE')
     106        # print(new_offer)
     107        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     108        if flag_price:
     109            print('PRICE CHANGED!')  # CHANGE PRICE
     110            print('offer id: ' + str(offer_id))
     111            headers = {'Content-type': 'application/json'}
     112            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     113                         headers=headers)
     114    else:
     115        print('ADDED')  # ADD OFFER
     116        print(new_offer)
     117        headers = {'Content-type': 'application/json'}
     118        requests.post('http://localhost:8080/phoneoffer/addoffer',
     119                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     120
     121print('------------------------------------')
     122
     123for old_offer in database_offers:
     124    flag = False
     125    for new_offer in new_offers:
     126        if old_offer.offer_shop_code == new_offer.offer_shop_code:
     127            flag = True
     128
     129    if not flag:
     130        print('OFFER DELETED')
     131        print(old_offer)
     132        # DELETE OFFER
     133        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/tehnomarket_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
     
    67from selenium import webdriver
    78import requests
     9import sys
    810
    9 import sys
     11from classes.phoneoffer import PhoneOffer
    1012
    1113file_path = 'outputfile.txt'
    1214sys.stdout = open(file_path, "w")
    1315
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    2316
    24 
    25 def scrape_function(driver1, i):
     17def scrape_function(driver1, i, new_offers):
    2618    offer_shop = "Tehnomarket"  # offer shop
    2719    last_updated = datetime.now().date()
     
    5951            offer_shop_code = details[4].strip()
    6052
     53            back_camera = None
     54            operating_system = None
     55            chipset = None
     56            battery = None
     57            ram_memory = None
     58            rom_memory = None
     59            cpu = None
     60            front_camera = None
     61            color = None
     62
    6163            specifications = []
    6264            for info in soup2.find_all('span', {'class': 'info'}):
    6365                specifications.append(info.get_text())
    6466
    65             print(brand)
    66             print(offer_name)
    67             print()
    68             print()
    69 
    7067            offer_description = '\n'.join(specifications)
    7168
    72             insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \
    73                             'offer_description, offer_shop_code, last_updated, is_validated)' \
    74                             ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    75             insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, offer_description,
    76                             offer_shop_code, last_updated, is_validated)
    77             cur.execute(insert_script, insert_value)
    78             db_connection.commit()
     69            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     70                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     71                                         image_url,
     72                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    7973    else:
    8074        driver1.implicitly_wait(30)
    81         scrape_function(driver1, i)
     75        scrape_function(driver1, i, new_offers)
    8276
     77
     78# Tehnomarket phone offers that are already in database
     79
     80offers = json.loads(
     81    unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text))
     82
     83database_offers = []
     84
     85for offer in offers:
     86    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     87                            offer['ram_memory'],
     88                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     89                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     90                            offer['image_url'],
     91                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     92                            offer['offer_description'],
     93                            offer['offer_shop_code'])
     94    database_offers.append(phoneOffer)
     95
     96new_offers = []
    8397
    8498for i in range(1, 6):
     
    90104    driver1.get(tehnomarket_url)
    91105
    92     scrape_function(driver1, i)
     106    scrape_function(driver1, i, new_offers)
     107
    93108    # closing the driver so the safari instance can pair with another webdriver session
    94109    driver1.close()
    95110
    96 cur.close()
    97 db_connection.close()
     111for new_offer in new_offers:
     112    flag = False
     113    flag_price = False
     114    offer_id = None
     115
     116    for old_offer in database_offers:
     117
     118        if new_offer.offer_shop_code == old_offer.offer_shop_code:
     119            flag = True
     120            if new_offer.price != old_offer.price:
     121                flag_price = True
     122                offer_id = old_offer.offer_id
     123
     124    if flag:
     125        # print('ALREADY IN DATABASE')
     126        # print(new_offer)
     127        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     128        if flag_price:
     129            print('PRICE CHANGED!')  # CHANGE PRICE
     130            print('offer id: ' + str(offer_id))
     131            headers = {'Content-type': 'application/json'}
     132            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     133                         headers=headers)
     134    else:
     135        print('ADDED')  # ADD OFFER
     136        print(new_offer)
     137        headers = {'Content-type': 'application/json'}
     138        requests.post('http://localhost:8080/phoneoffer/addoffer',
     139                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     140
     141print('------------------------------------')
     142
     143for old_offer in database_offers:
     144    flag = False
     145    for new_offer in new_offers:
     146        if old_offer.offer_shop_code == new_offer.offer_shop_code:
     147            flag = True
     148
     149    if not flag:
     150        print('OFFER DELETED')
     151        print(old_offer)
     152        # DELETE OFFER
     153        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note: See TracChangeset for help on using the changeset viewer.