Ignore:
Timestamp:
11/20/22 16:34:52 (2 years ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/mobitech_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1415sys.stdout = open(file_path, "w")
    1516
    16 
    17 mobitech_url = "https://mobitech.mk/shop/"
    18 
    19 response1 = requests.get(mobitech_url)
    20 
    21 soup1 = BeautifulSoup(response1.content, 'html.parser')
    22 
    23 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})
    24 
    2517offer_shop = "Mobitech"  # offer shop
    2618last_updated = datetime.now().date()
    2719is_validated = False
    2820
    29 # Mobitech phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    3030
    31 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text))
     31try:
     32    mobitech_url = "https://mobitech.mk/shop/"
    3233
    33 database_offers = []
     34    response1 = requests.get(mobitech_url)
    3435
    35 for offer in offers:
    36     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    37                             offer['ram_memory'],
    38                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    39                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    40                             offer['image_url'],
    41                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    42                             offer['offer_description'],
    43                             offer['offer_shop_code'])
    44     database_offers.append(phoneOffer)
     36    soup1 = BeautifulSoup(response1.content, 'html.parser')
    4537
    46 new_offers = []
     38    phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})
    4739
    48 for phone in phones:
    49     offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href')  # url
    50     image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src')  # image
    51     brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip()  # brand
    52     offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip()  # offer_name
    53     if brand not in offer_name:
    54         offer_name = brand+" "+offer_name
    55     temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi')
    56     price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price
     40    # Mobitech phone offers that are already in database
     41    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text))
    5742
    58     response2 = requests.get(offer_url)
    59     soup2 = BeautifulSoup(response2.content, 'html.parser')
     43    database_offers = []
    6044
    61     specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'})
     45    for offer in offers:
     46        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     47                                offer['ram_memory'],
     48                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     49                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     50                                offer['image_url'],
     51                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     52                                offer['offer_description'],
     53                                offer['offer_shop_code'])
     54        database_offers.append(phoneOffer)
    6255
    63     ram_memory = None
    64     rom_memory = None
    65     battery = None
    66     back_camera = None
    67     front_camera = None
    68     operating_system = None
    69     chipset = None
    70     color = None
    71     offer_shop_code = None
    72     cpu = None
    73     offer_description = None
     56    new_offers = []
    7457
    75     for specification in specifications:
    76         # rom memory
    77         if specification.get_text().startswith("Меморија:"):
    78             rom_memory = specification.get_text().split("Меморија:")[1].strip()
    79             if rom_memory == "Нема" or rom_memory == "/":
    80                 rom_memory = None
     58    for phone in phones:
     59        offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href')  # url
     60        image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src')  # image
     61        brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip()  # brand
     62        offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip()  # offer_name
     63        if brand not in offer_name:
     64            offer_name = brand+" "+offer_name
     65        temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi')
     66        price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price
    8167
    82         # ram memory
    83         if specification.get_text().startswith("РАМ Меморија:"):
    84             ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\
    85                 .replace('Ram', '').strip()
    86             if ram_memory == "Нема" or ram_memory == "/":
    87                 ram_memory = None
     68        response2 = requests.get(offer_url)
     69        soup2 = BeautifulSoup(response2.content, 'html.parser')
    8870
    89         # camera
    90         if specification.get_text().startswith("Камера:"):
    91             back_camera = specification.get_text().split("Камера:")[1].strip()
    92             if back_camera == "Нема":
    93                 back_camera = None
     71        specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'})
    9472
    95         # operating system
    96         if specification.get_text().startswith("Оперативен систем:"):
    97             operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip()
    98             if operating_system == "Нема":
    99                 operating_system = None
     73        ram_memory = None
     74        rom_memory = None
     75        battery = None
     76        back_camera = None
     77        front_camera = None
     78        operating_system = None
     79        chipset = None
     80        color = None
     81        offer_shop_code = None
     82        cpu = None
     83        offer_description = None
    10084
    101         # battery
    102         if specification.get_text().startswith("Батерија:"):
    103             battery = specification.get_text().split("Батерија:")[1].strip()
    104             if battery == "Нема":
    105                 battery = None
     85        for specification in specifications:
     86            # rom memory
     87            if specification.get_text().startswith("Меморија:"):
     88                rom_memory = specification.get_text().split("Меморија:")[1].strip()
     89                if rom_memory == "Нема" or rom_memory == "/":
     90                    rom_memory = None
    10691
    107     new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    108                                  color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    109                                  image_url,
    110                                  offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     92            # ram memory
     93            if specification.get_text().startswith("РАМ Меморија:"):
     94                ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\
     95                    .replace('Ram', '').strip()
     96                if ram_memory == "Нема" or ram_memory == "/":
     97                    ram_memory = None
    11198
    112 for new_offer in new_offers:
    113     flag = False
    114     flag_price = False
    115     offer_id = None
     99            # camera
     100            if specification.get_text().startswith("Камера:"):
     101                back_camera = specification.get_text().split("Камера:")[1].strip()
     102                if back_camera == "Нема":
     103                    back_camera = None
     104
     105            # operating system
     106            if specification.get_text().startswith("Оперативен систем:"):
     107                operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip()
     108                if operating_system == "Нема":
     109                    operating_system = None
     110
     111            # battery
     112            if specification.get_text().startswith("Батерија:"):
     113                battery = specification.get_text().split("Батерија:")[1].strip()
     114                if battery == "Нема":
     115                    battery = None
     116
     117        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     118                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     119                                     image_url,
     120                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     121
     122    for new_offer in new_offers:
     123        flag = False
     124        flag_price = False
     125        offer_id = None
     126
     127        for old_offer in database_offers:
     128
     129            if new_offer.offer_name == old_offer.offer_name:
     130                flag = True
     131                if new_offer.price != old_offer.price:
     132                    flag_price = True
     133                    offer_id = old_offer.offer_id
     134
     135        if flag:
     136            print('ALREADY IN DATABASE')
     137            print(new_offer)
     138            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     139            if flag_price:
     140                print('PRICE CHANGED!')  # CHANGE PRICE
     141                print('offer id: ' + str(offer_id))
     142                headers = {'Content-type': 'application/json'}
     143                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     144                             headers=headers)
     145        else:
     146            print('ADDED')  # ADD OFFER
     147            print(new_offer)
     148            headers = {'Content-type': 'application/json'}
     149            requests.post('http://localhost:8080/phoneoffer/addoffer',
     150                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     151
     152    print('------------------------------------')
    116153
    117154    for old_offer in database_offers:
     155        flag = False
     156        for new_offer in new_offers:
     157            if old_offer.offer_name == new_offer.offer_name:
     158                flag = True
    118159
    119         if new_offer.offer_name == old_offer.offer_name:
    120             flag = True
    121             if new_offer.price != old_offer.price:
    122                 flag_price = True
    123                 offer_id = old_offer.offer_id
     160        if not flag:
     161            print('OFFER DELETED')
     162            print(old_offer)
     163            # DELETE OFFER
     164            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     165except Exception:
     166    traceback.print_exc()
     167    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     168                    ' VALUES (%s, %s, %s);'
     169    insert_value = (offer_shop, last_updated, 'failed')
     170    cur.execute(insert_script, insert_value)
     171    db_connection.commit()
     172    cur.close()
     173    db_connection.close()
     174else:
     175    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     176                    ' VALUES (%s, %s, %s);'
     177    insert_value = (offer_shop, last_updated, 'success')
     178    cur.execute(insert_script, insert_value)
     179    db_connection.commit()
     180    cur.close()
     181    db_connection.close()
    124182
    125     if flag:
    126         print('ALREADY IN DATABASE')
    127         print(new_offer)
    128         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    129         if flag_price:
    130             print('PRICE CHANGED!')  # CHANGE PRICE
    131             print('offer id: ' + str(offer_id))
    132             headers = {'Content-type': 'application/json'}
    133             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    134                          headers=headers)
    135     else:
    136         print('ADDED')  # ADD OFFER
    137         print(new_offer)
    138         headers = {'Content-type': 'application/json'}
    139         requests.post('http://localhost:8080/phoneoffer/addoffer',
    140                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    141 
    142 print('------------------------------------')
    143 
    144 for old_offer in database_offers:
    145     flag = False
    146     for new_offer in new_offers:
    147         if old_offer.offer_name == new_offer.offer_name:
    148             flag = True
    149 
    150     if not flag:
    151         print('OFFER DELETED')
    152         print(old_offer)
    153         # DELETE OFFER
    154         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
    155 
Note: See TracChangeset for help on using the changeset viewer.