Ignore:
Timestamp:
11/20/22 16:34:52 (2 years ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/handy_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    2021is_validated = False
    2122
    22 # Handy phone offers that are already in database
    23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text))
     23# Call to read the configuration file and connect to database
     24cinfo = config_read.get_databaseconfig("../postgresdb.config")
     25db_connection = psycopg2.connect(
     26    database=cinfo[0],
     27    host=cinfo[1],
     28    user=cinfo[2],
     29    password=cinfo[3]
     30)
     31cur = db_connection.cursor()
    2432
    25 database_offers = []
     33try:
     34    # Handy phone offers that are already in database
     35    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text))
    2636
    27 for offer in offers:
    28     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    29                             offer['ram_memory'],
    30                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    31                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    32                             offer['image_url'],
    33                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    34                             offer['offer_description'],
    35                             offer['offer_shop_code'])
    36     database_offers.append(phoneOffer)
     37    database_offers = []
    3738
    38 new_offers = []
     39    for offer in offers:
     40        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     41                                offer['ram_memory'],
     42                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     43                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     44                                offer['image_url'],
     45                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     46                                offer['offer_description'],
     47                                offer['offer_shop_code'])
     48        database_offers.append(phoneOffer)
    3949
    40 handy_url = 'https://www.handy.mk/telefoni?page=6'
     50    new_offers = []
    4151
    42 response1 = requests.get(handy_url)
    43 soup1 = BeautifulSoup(response1.content, 'html.parser')
     52    handy_url = 'https://www.handy.mk/telefoni?page=6'
    4453
    45 phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'})
     54    response1 = requests.get(handy_url)
     55    soup1 = BeautifulSoup(response1.content, 'html.parser')
    4656
    47 for phone in phones:
    48     offer_url = phone.find('a').get('href')
    49     offer_name = phone.find('div', {'data-hook': 'not-image-container'})\
    50         .find('h3', {'data-hook': 'product-item-name'}).get_text().strip()
    51     brand = offer_name.split(' ')[0].capitalize()
    52     price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\
    53         .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.')))
     57    phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'})
    5458
    55     response2 = requests.get(offer_url)
    56     soup2 = BeautifulSoup(response2.text, 'html.parser')
     59    for phone in phones:
     60        offer_url = phone.find('a').get('href')
     61        offer_name = phone.find('div', {'data-hook': 'not-image-container'})\
     62            .find('h3', {'data-hook': 'product-item-name'}).get_text().strip()
     63        brand = offer_name.split(' ')[0].capitalize()
     64        price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\
     65            .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.')))
    5766
    58     back_camera = None
    59     operating_system = None
    60     chipset = None
    61     battery = None
    62     ram_memory = None
    63     rom_memory = None
    64     cpu = None
    65     front_camera = None
    66     offer_shop_code = None
    67     color = None
    68     image_url = None
     67        response2 = requests.get(offer_url)
     68        soup2 = BeautifulSoup(response2.text, 'html.parser')
    6969
    70     color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'})
    71     if color_section is not None:
    72         temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\
    73             .find_all('input', {'type': 'radio'})
    74         colors_list = []
    75         for temp_color in temp_colors:
    76             colors_list.append(temp_color.get('aria-label'))
    77         color = ','.join(colors_list)
     70        back_camera = None
     71        operating_system = None
     72        chipset = None
     73        battery = None
     74        ram_memory = None
     75        rom_memory = None
     76        cpu = None
     77        front_camera = None
     78        offer_shop_code = None
     79        color = None
     80        image_url = None
    7881
    79     rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li')
     82        color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'})
     83        if color_section is not None:
     84            temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\
     85                .find_all('input', {'type': 'radio'})
     86            colors_list = []
     87            for temp_color in temp_colors:
     88                colors_list.append(temp_color.get('aria-label'))
     89            color = ','.join(colors_list)
    8090
    81     if len(rows) == 0:
    82         rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr')
     91        rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li')
    8392
    84     specifications = []
     93        if len(rows) == 0:
     94            rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr')
    8595
    86     for row in rows:
    87         specifications.append(unicodedata.normalize('NFKD', row.get_text().strip()))
     96        specifications = []
    8897
    89     offer_description = '\n'.join(specifications)
     98        for row in rows:
     99            specifications.append(unicodedata.normalize('NFKD', row.get_text().strip()))
    90100
    91     new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    92                                  color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    93                                  image_url,
    94                                  offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     101        offer_description = '\n'.join(specifications)
    95102
    96 for new_offer in new_offers:
    97     flag = False
    98     flag_price = False
    99     offer_id = None
     103        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     104                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     105                                     image_url,
     106                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     107
     108    for new_offer in new_offers:
     109        flag = False
     110        flag_price = False
     111        offer_id = None
     112
     113        for old_offer in database_offers:
     114
     115            if new_offer.offer_name == old_offer.offer_name:
     116                flag = True
     117                if new_offer.price != old_offer.price:
     118                    flag_price = True
     119                    offer_id = old_offer.offer_id
     120
     121        if flag:
     122            # print('ALREADY IN DATABASE')
     123            # print(new_offer)
     124            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     125            if flag_price:
     126                print('PRICE CHANGED!')  # CHANGE PRICE
     127                print('offer id: ' + str(offer_id))
     128                headers = {'Content-type': 'application/json'}
     129                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     130                             headers=headers)
     131        else:
     132            print('ADDED')  # ADD OFFER
     133            print(new_offer)
     134            headers = {'Content-type': 'application/json'}
     135            requests.post('http://localhost:8080/phoneoffer/addoffer',
     136                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     137
     138    print('------------------------------------')
    100139
    101140    for old_offer in database_offers:
     141        flag = False
     142        for new_offer in new_offers:
     143            if old_offer.offer_name == new_offer.offer_name:
     144                flag = True
    102145
    103         if new_offer.offer_name == old_offer.offer_name:
    104             flag = True
    105             if new_offer.price != old_offer.price:
    106                 flag_price = True
    107                 offer_id = old_offer.offer_id
    108 
    109     if flag:
    110         # print('ALREADY IN DATABASE')
    111         # print(new_offer)
    112         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    113         if flag_price:
    114             print('PRICE CHANGED!')  # CHANGE PRICE
    115             print('offer id: ' + str(offer_id))
    116             headers = {'Content-type': 'application/json'}
    117             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    118                          headers=headers)
    119     else:
    120         print('ADDED')  # ADD OFFER
    121         print(new_offer)
    122         headers = {'Content-type': 'application/json'}
    123         requests.post('http://localhost:8080/phoneoffer/addoffer',
    124                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    125 
    126 print('------------------------------------')
    127 
    128 for old_offer in database_offers:
    129     flag = False
    130     for new_offer in new_offers:
    131         if old_offer.offer_name == new_offer.offer_name:
    132             flag = True
    133 
    134     if not flag:
    135         print('OFFER DELETED')
    136         print(old_offer)
    137         # DELETE OFFER
    138         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     146        if not flag:
     147            print('OFFER DELETED')
     148            print(old_offer)
     149            # DELETE OFFER
     150            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     151except Exception:
     152    traceback.print_exc()
     153    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     154                    ' VALUES (%s, %s, %s);'
     155    insert_value = (offer_shop, last_updated, 'failed')
     156    cur.execute(insert_script, insert_value)
     157    db_connection.commit()
     158    cur.close()
     159    db_connection.close()
     160else:
     161    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     162                    ' VALUES (%s, %s, %s);'
     163    insert_value = (offer_shop, last_updated, 'success')
     164    cur.execute(insert_script, insert_value)
     165    db_connection.commit()
     166    cur.close()
     167    db_connection.close()
    139168
    140169
     170
Note: See TracChangeset for help on using the changeset viewer.