Ignore:
Timestamp:
11/20/22 16:34:52 (20 months ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/setec_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1718is_validated = False
    1819
    19 # Setec phone offers that are already in database
     20# Call to read the configuration file and connect to database
     21cinfo = config_read.get_databaseconfig("../postgresdb.config")
     22db_connection = psycopg2.connect(
     23    database=cinfo[0],
     24    host=cinfo[1],
     25    user=cinfo[2],
     26    password=cinfo[3]
     27)
     28cur = db_connection.cursor()
    2029
    21 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text))
     30try:
     31    # Setec phone offers that are already in database
     32    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text))
    2233
    23 database_offers = []
     34    database_offers = []
    2435
    25 for offer in offers:
    26     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    27                             offer['ram_memory'],
    28                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    29                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    30                             offer['image_url'],
    31                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    32                             offer['offer_description'],
    33                             offer['offer_shop_code'])
    34     database_offers.append(phoneOffer)
     36    for offer in offers:
     37        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     38                                offer['ram_memory'],
     39                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     40                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     41                                offer['image_url'],
     42                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     43                                offer['offer_description'],
     44                                offer['offer_shop_code'])
     45        database_offers.append(phoneOffer)
    3546
    36 new_offers = []
     47    new_offers = []
    3748
    38 for i in range(1, 9):
    39     setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)
     49    for i in range(1, 9):
     50        setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)
    4051
    41     response1 = requests.get(setec_url)
    42     soup1 = BeautifulSoup(response1.content, 'html.parser')
     52        response1 = requests.get(setec_url)
     53        soup1 = BeautifulSoup(response1.content, 'html.parser')
    4354
    44     phones = soup1.find('div', {'id': 'mfilter-content-container'}) \
    45         .find_all('div', {'class': 'col-sm-4 col-xs-6'})
     55        phones = soup1.find('div', {'id': 'mfilter-content-container'}) \
     56            .find_all('div', {'class': 'col-sm-4 col-xs-6'})
    4657
    47     for phone in phones:
    48         offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')
    49         image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')
    50         offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()
    51         brand = offer_name.split(' ')[0]
     58        for phone in phones:
     59            offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')
     60            image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')
     61            offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()
     62            brand = offer_name.split(' ')[0]
    5263
    53         back_camera = None
    54         operating_system = None
    55         chipset = None
    56         battery = None
    57         ram_memory = None
    58         rom_memory = None
    59         cpu = None
    60         front_camera = None
    61         color = None
     64            back_camera = None
     65            operating_system = None
     66            chipset = None
     67            battery = None
     68            ram_memory = None
     69            rom_memory = None
     70            cpu = None
     71            front_camera = None
     72            color = None
    6273
    63         if 'Cable' in offer_name or 'AirTag' in offer_name:
    64             continue
     74            if 'Cable' in offer_name or 'AirTag' in offer_name:
     75                continue
    6576
    66         if brand not in offer_name:
    67             offer_name = brand + " " + offer_name
     77            if brand not in offer_name:
     78                offer_name = brand + " " + offer_name
    6879
    69         offer_shop_code = phone.find('div', {'class': 'right'}) \
    70             .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()
     80            offer_shop_code = phone.find('div', {'class': 'right'}) \
     81                .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()
    7182
    72         price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
    73             find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})
     83            price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
     84                find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})
    7485
    75         if price_tag is None:
    76             price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
    77                 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})
     86            if price_tag is None:
     87                price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
     88                    find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})
    7889
    79         price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())
     90            price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())
    8091
    81         response2 = requests.get(offer_url)
    82         soup2 = BeautifulSoup(response2.content, 'html.parser')
     92            response2 = requests.get(offer_url)
     93            soup2 = BeautifulSoup(response2.content, 'html.parser')
    8394
    84         offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')
     95            offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')
    8596
    86         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    87                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    88                                      image_url,
    89                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     97            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     98                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     99                                         image_url,
     100                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    90101
    91 for new_offer in new_offers:
    92     flag = False
    93     flag_price = False
    94     offer_id = None
     102    for new_offer in new_offers:
     103        flag = False
     104        flag_price = False
     105        offer_id = None
     106
     107        for old_offer in database_offers:
     108
     109            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     110                flag = True
     111                if new_offer.price != old_offer.price:
     112                    flag_price = True
     113                    offer_id = old_offer.offer_id
     114
     115        if flag:
     116            # print('ALREADY IN DATABASE')
     117            # print(new_offer)
     118            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     119            if flag_price:
     120                print('PRICE CHANGED!')  # CHANGE PRICE
     121                print('offer id: ' + str(offer_id))
     122                headers = {'Content-type': 'application/json'}
     123                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     124                             headers=headers)
     125        else:
     126            print('ADDED')  # ADD OFFER
     127            print(new_offer)
     128            headers = {'Content-type': 'application/json'}
     129            requests.post('http://localhost:8080/phoneoffer/addoffer',
     130                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     131
     132    print('------------------------------------')
    95133
    96134    for old_offer in database_offers:
     135        flag = False
     136        for new_offer in new_offers:
     137            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     138                flag = True
    97139
    98         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    99             flag = True
    100             if new_offer.price != old_offer.price:
    101                 flag_price = True
    102                 offer_id = old_offer.offer_id
    103 
    104     if flag:
    105         # print('ALREADY IN DATABASE')
    106         # print(new_offer)
    107         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    108         if flag_price:
    109             print('PRICE CHANGED!')  # CHANGE PRICE
    110             print('offer id: ' + str(offer_id))
    111             headers = {'Content-type': 'application/json'}
    112             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    113                          headers=headers)
    114     else:
    115         print('ADDED')  # ADD OFFER
    116         print(new_offer)
    117         headers = {'Content-type': 'application/json'}
    118         requests.post('http://localhost:8080/phoneoffer/addoffer',
    119                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    120 
    121 print('------------------------------------')
    122 
    123 for old_offer in database_offers:
    124     flag = False
    125     for new_offer in new_offers:
    126         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    127             flag = True
    128 
    129     if not flag:
    130         print('OFFER DELETED')
    131         print(old_offer)
    132         # DELETE OFFER
    133         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     140        if not flag:
     141            print('OFFER DELETED')
     142            print(old_offer)
     143            # DELETE OFFER
     144            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     145except Exception:
     146    traceback.print_exc()
     147    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     148                    ' VALUES (%s, %s, %s);'
     149    insert_value = (offer_shop, last_updated, 'failed')
     150    cur.execute(insert_script, insert_value)
     151    db_connection.commit()
     152    cur.close()
     153    db_connection.close()
     154else:
     155    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     156                    ' VALUES (%s, %s, %s);'
     157    insert_value = (offer_shop, last_updated, 'success')
     158    cur.execute(insert_script, insert_value)
     159    db_connection.commit()
     160    cur.close()
     161    db_connection.close()
Note: See TracChangeset for help on using the changeset viewer.