Ignore:
Timestamp:
11/20/22 16:34:52 (20 months ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/a1_scrapper.py

    rffd50db r47f4eaf  
     1import traceback
    12import unicodedata
    23from datetime import datetime
     
    1819is_validated = False
    1920
    20 # A1 phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    2130
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
     31try:
     32    # A1 phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
    2334
    24 database_offers = []
     35    database_offers = []
    2536
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
    3647
    37 a1_url = 'https://www.a1.mk/webshop/mk/phones'
     48    a1_url = 'https://www.a1.mk/webshop/mk/phones'
    3849
    39 response1 = requests.get(a1_url)
    40 soup1 = BeautifulSoup(response1.content, 'html.parser')
     50    response1 = requests.get(a1_url)
     51    soup1 = BeautifulSoup(response1.content, 'html.parser')
    4152
    42 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
    43     .find_all('div', {'class', 'dvc-idtfr by4'})
     53    phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
     54        .find_all('div', {'class', 'dvc-idtfr by4'})
    4455
    45 new_offers = []
     56    new_offers = []
    4657
    47 for phone in phones:
    48     brand = phone.get('data-brand').strip()
    49     offer_name = brand + " " + phone.get('data-model').strip()
     58    for phone in phones:
     59        brand = phone.get('data-brand').strip()
     60        offer_name = brand + " " + phone.get('data-model').strip()
    5061
    51     # if brand not in offer_name:
    52     #     offer_name = brand+" "+offer_name
     62        # if brand not in offer_name:
     63        #     offer_name = brand+" "+offer_name
    5364
    54     offer_shop_code = phone.get('data-productid').strip()
    55     offer_url = phone.find('a', {'class', 'device-link'}).get('href')
    56     image_url = phone.get('data-image')
     65        offer_shop_code = phone.get('data-productid').strip()
     66        offer_url = phone.find('a', {'class', 'device-link'}).get('href')
     67        image_url = phone.get('data-image')
    5768
    58     response2 = requests.get(offer_url)
    59     soup2 = BeautifulSoup(response2.content, 'html.parser')
     69        response2 = requests.get(offer_url)
     70        soup2 = BeautifulSoup(response2.content, 'html.parser')
    6071
    61     temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
    62         .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
     72        temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
     73            .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
    6374
    64     # offer price
    65     price = None
    66     for temp_price in temp_prices:
    67         if 'Цена само за уред' in temp_price.get_text().strip():
    68             price = int(temp_price.get_text().replace('Цена само за уред', '')
    69                         .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
     75        # offer price
     76        price = None
     77        for temp_price in temp_prices:
     78            if 'Цена само за уред' in temp_price.get_text().strip():
     79                price = int(temp_price.get_text().replace('Цена само за уред', '')
     80                            .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
    7081
    71     colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
     82        colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
    7283
    73     temp_colors = []
    74     for color_section in colors_section:
    75         temp_colors.append(color_section.get('data-content'))
     84        temp_colors = []
     85        for color_section in colors_section:
     86            temp_colors.append(color_section.get('data-content'))
    7687
    77     color = ','.join(temp_colors)  # colors available for the offer
     88        color = ','.join(temp_colors)  # colors available for the offer
    7889
    79     phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
     90        phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
    8091
    81     table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
     92        table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
    8293
    83     back_camera = None
    84     operating_system = None
    85     cpu = None
    86     rom_memory = None
    87     ram_memory = None
    88     battery = None
    89     front_camera = None
    90     chipset = None
    91     offer_description = None
     94        back_camera = None
     95        operating_system = None
     96        cpu = None
     97        rom_memory = None
     98        ram_memory = None
     99        battery = None
     100        front_camera = None
     101        chipset = None
     102        offer_description = None
    92103
    93     for row in table_rows:
    94         if 'Камера' in row.get_text().strip():
    95             back_camera = row.get_text().replace('Камера', '').strip()
     104        for row in table_rows:
     105            if 'Камера' in row.get_text().strip():
     106                back_camera = row.get_text().replace('Камера', '').strip()
    96107
    97         if 'Оперативен систем' in row.get_text().strip():
    98             operating_system = row.get_text().replace('Оперативен систем', '').strip()
     108            if 'Оперативен систем' in row.get_text().strip():
     109                operating_system = row.get_text().replace('Оперативен систем', '').strip()
    99110
    100         if 'CPU' in row.get_text().strip():
    101             cpu = row.get_text().replace('CPU', '').strip()
     111            if 'CPU' in row.get_text().strip():
     112                cpu = row.get_text().replace('CPU', '').strip()
    102113
    103         if 'Вградена меморија' in row.get_text().strip():
    104             rom_memory = row.get_text().replace('Вградена меморија', '').strip()
     114            if 'Вградена меморија' in row.get_text().strip():
     115                rom_memory = row.get_text().replace('Вградена меморија', '').strip()
    105116
    106         if 'RAM меморија' in row.get_text().strip():
    107             ram_memory = row.get_text().replace('RAM меморија', '').strip()
     117            if 'RAM меморија' in row.get_text().strip():
     118                ram_memory = row.get_text().replace('RAM меморија', '').strip()
    108119
    109         if 'Батерија' in row.get_text().strip():
    110             battery = row.get_text().replace('Батерија', '').strip()
     120            if 'Батерија' in row.get_text().strip():
     121                battery = row.get_text().replace('Батерија', '').strip()
    111122
    112         if 'Предна камера' in row.get_text().strip():
    113             front_camera = row.get_text().replace('Предна камера', '').strip()
     123            if 'Предна камера' in row.get_text().strip():
     124                front_camera = row.get_text().replace('Предна камера', '').strip()
    114125
    115     new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    116                                  color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url,
    117                                  offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     126        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     127                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     128                                     image_url,
     129                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    118130
    119 for new_offer in new_offers:
    120     flag = False
    121     flag_price = False
    122     offer_id = None
     131    for new_offer in new_offers:
     132        flag = False
     133        flag_price = False
     134        offer_id = None
     135
     136        for old_offer in database_offers:
     137
     138            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     139                flag = True
     140                if new_offer.price != old_offer.price:
     141                    flag_price = True
     142                    offer_id = old_offer.offer_id
     143
     144        if flag:
     145            # print('ALREADY IN DATABASE')
     146            # print(new_offer)
     147            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     148            if flag_price:
     149                print('PRICE CHANGED!')  # CHANGE PRICE
     150                print('offer id: ' + str(offer_id))
     151                headers = {'Content-type': 'application/json'}
     152                requests.put(
     153                    'http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     154                    headers=headers)
     155        else:
     156            print('ADDED')  # ADD OFFER
     157            print(new_offer)
     158            headers = {'Content-type': 'application/json'}
     159            requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers,
     160                          data=json.dumps(new_offer.__dict__,
     161                                          default=str))
     162
     163    print('------------------------------------')
    123164
    124165    for old_offer in database_offers:
     166        flag = False
     167        for new_offer in new_offers:
     168            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     169                flag = True
    125170
    126         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    127             flag = True
    128             if new_offer.price != old_offer.price:
    129                 flag_price = True
    130                 offer_id = old_offer.offer_id
    131 
    132     if flag:
    133         # print('ALREADY IN DATABASE')
    134         # print(new_offer)
    135         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    136         if flag_price:
    137             print('PRICE CHANGED!')  # CHANGE PRICE
    138             print('offer id: ' + str(offer_id))
    139             headers = {'Content-type': 'application/json'}
    140             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    141                          headers=headers)
    142     else:
    143         print('ADDED')  # ADD OFFER
    144         print(new_offer)
    145         headers = {'Content-type': 'application/json'}
    146         requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__,
    147                                                                                                     default=str))
    148 
    149 print('------------------------------------')
    150 
    151 for old_offer in database_offers:
    152     flag = False
    153     for new_offer in new_offers:
    154         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    155             flag = True
    156 
    157     if not flag:
    158         print('OFFER DELETED')
    159         print(old_offer)
    160         # DELETE OFFER
    161         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     171        if not flag:
     172            print('OFFER DELETED')
     173            print(old_offer)
     174            # DELETE OFFER
     175            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     176except Exception:
     177    traceback.print_exc()
     178    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     179                    ' VALUES (%s, %s, %s);'
     180    insert_value = (offer_shop, last_updated, 'failed')
     181    cur.execute(insert_script, insert_value)
     182    db_connection.commit()
     183    cur.close()
     184    db_connection.close()
     185else:
     186    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     187                    ' VALUES (%s, %s, %s);'
     188    insert_value = (offer_shop, last_updated, 'success')
     189    cur.execute(insert_script, insert_value)
     190    db_connection.commit()
     191    cur.close()
     192    db_connection.close()
Note: See TracChangeset for help on using the changeset viewer.