Ignore:
Timestamp:
11/20/22 16:34:52 (20 months ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/akcija_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23from datetime import datetime
    34
     
    1819is_validated = False
    1920
    20 # Akcija phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    2130
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text))
     31try:
     32    # Akcija phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text))
    2334
    24 database_offers = []
     35    database_offers = []
    2536
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
    3647
    37 new_offers = []
     48    new_offers = []
    3849
    39 i = 0
    40 while i <= 20:
    41     akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"
    42     response1 = requests.get(akcija_url)
    43     response1.encoding = 'utf-8'
    44     soup1 = BeautifulSoup(response1.text, 'html.parser')
     50    i = 0
     51    while i <= 20:
     52        akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"
     53        response1 = requests.get(akcija_url)
     54        response1.encoding = 'utf-8'
     55        soup1 = BeautifulSoup(response1.text, 'html.parser')
    4556
    46     phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})
     57        phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})
    4758
    48     for phone in phones:
    49         offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \
    50             .get_text().replace('Паметен телефон', '').strip()
    51         brand = offer_name.split(' ')[0]
     59        for phone in phones:
     60            offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \
     61                .get_text().replace('Паметен телефон', '').strip()
     62            brand = offer_name.split(' ')[0]
    5263
    53         if brand not in offer_name:
    54             offer_name = brand + " " + offer_name
     64            if brand not in offer_name:
     65                offer_name = brand + " " + offer_name
    5566
    56         offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')
    57         image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')
    58         price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \
    59                     .find('ins').get_text().split(' ')[0].strip())
     67            offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')
     68            image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')
     69            price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \
     70                        .find('ins').get_text().split(' ')[0].strip())
    6071
    61         response2 = requests.get(offer_url)
    62         response2.encoding = 'utf-8'
    63         soup2 = BeautifulSoup(response2.text, 'html.parser')
     72            response2 = requests.get(offer_url)
     73            response2.encoding = 'utf-8'
     74            soup2 = BeautifulSoup(response2.text, 'html.parser')
    6475
    65         back_camera = None
    66         operating_system = None
    67         chipset = None
    68         battery = None
    69         ram_memory = None
    70         rom_memory = None
    71         cpu = None
    72         front_camera = None
    73         color = None
    74         offer_shop_code = None
     76            back_camera = None
     77            operating_system = None
     78            chipset = None
     79            battery = None
     80            ram_memory = None
     81            rom_memory = None
     82            cpu = None
     83            front_camera = None
     84            color = None
     85            offer_shop_code = None
    7586
    76         specifications = soup2.find('main', {'id': 'content'}) \
    77             .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
    78             .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')
     87            specifications = soup2.find('main', {'id': 'content'}) \
     88                .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
     89                .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')
    7990
    80         offer_description = ''
    81         for specification in specifications:
    82             if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):
    83                 continue
    84             offer_description += unicodedata.normalize('NFKD',
    85                                                        str(specification.get_text(separator='\n').strip())) + "\n"
     91            offer_description = ''
     92            for specification in specifications:
     93                if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):
     94                    continue
     95                offer_description += unicodedata.normalize('NFKD',
     96                                                           str(specification.get_text(separator='\n').strip())) + "\n"
    8697
    87         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    88                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    89                                      image_url,
    90                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    91     i += 20
     98            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     99                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     100                                         image_url,
     101                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     102        i += 20
    92103
    93 for new_offer in new_offers:
    94     flag = False
    95     flag_price = False
    96     offer_id = None
     104    for new_offer in new_offers:
     105        flag = False
     106        flag_price = False
     107        offer_id = None
     108
     109        for old_offer in database_offers:
     110
     111            if new_offer.offer_name == old_offer.offer_name:
     112                flag = True
     113                if new_offer.price != old_offer.price:
     114                    flag_price = True
     115                    offer_id = old_offer.offer_id
     116
     117        if flag:
     118            # print('ALREADY IN DATABASE')
     119            # print(new_offer)
     120            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     121            if flag_price:
     122                print('PRICE CHANGED!')  # CHANGE PRICE
     123                print('offer id: ' + str(offer_id))
     124                headers = {'Content-type': 'application/json'}
     125                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     126                             headers=headers)
     127        else:
     128            print('ADDED')  # ADD OFFER
     129            print(new_offer)
     130            headers = {'Content-type': 'application/json'}
     131            requests.post('http://localhost:8080/phoneoffer/addoffer',
     132                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     133
     134    print('------------------------------------')
    97135
    98136    for old_offer in database_offers:
     137        flag = False
     138        for new_offer in new_offers:
     139            if old_offer.offer_name == new_offer.offer_name:
     140                flag = True
    99141
    100         if new_offer.offer_name == old_offer.offer_name:
    101             flag = True
    102             if new_offer.price != old_offer.price:
    103                 flag_price = True
    104                 offer_id = old_offer.offer_id
     142        if not flag:
     143            print('OFFER DELETED')
     144            print(old_offer)
     145            # DELETE OFFER
     146            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     147except Exception:
     148    traceback.print_exc()
     149    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     150                    ' VALUES (%s, %s, %s);'
     151    insert_value = (offer_shop, last_updated, 'failed')
     152    cur.execute(insert_script, insert_value)
     153    db_connection.commit()
     154    cur.close()
     155    db_connection.close()
     156else:
     157    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     158                    ' VALUES (%s, %s, %s);'
     159    insert_value = (offer_shop, last_updated, 'success')
     160    cur.execute(insert_script, insert_value)
     161    db_connection.commit()
     162    cur.close()
     163    db_connection.close()
    105164
    106     if flag:
    107         # print('ALREADY IN DATABASE')
    108         # print(new_offer)
    109         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    110         if flag_price:
    111             print('PRICE CHANGED!')  # CHANGE PRICE
    112             print('offer id: ' + str(offer_id))
    113             headers = {'Content-type': 'application/json'}
    114             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    115                          headers=headers)
    116     else:
    117         print('ADDED')  # ADD OFFER
    118         print(new_offer)
    119         headers = {'Content-type': 'application/json'}
    120         requests.post('http://localhost:8080/phoneoffer/addoffer',
    121                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    122 
    123 print('------------------------------------')
    124 
    125 for old_offer in database_offers:
    126     flag = False
    127     for new_offer in new_offers:
    128         if old_offer.offer_name == new_offer.offer_name:
    129             flag = True
    130 
    131     if not flag:
    132         print('OFFER DELETED')
    133         print(old_offer)
    134         # DELETE OFFER
    135         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note: See TracChangeset for help on using the changeset viewer.