Ignore:
Timestamp:
11/20/22 16:34:52 (2 years ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/neptun_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1920is_validated = False
    2021
    21 # Neptun phone offers that are already in database
     22# Call to read the configuration file and connect to database
     23cinfo = config_read.get_databaseconfig("../postgresdb.config")
     24db_connection = psycopg2.connect(
     25    database=cinfo[0],
     26    host=cinfo[1],
     27    user=cinfo[2],
     28    password=cinfo[3]
     29)
     30cur = db_connection.cursor()
    2231
    23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text))
     32try:
     33    # Neptun phone offers that are already in database
     34    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text))
    2435
    25 database_offers = []
     36    database_offers = []
    2637
    27 for offer in offers:
    28     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    29                             offer['ram_memory'],
    30                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    31                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    32                             offer['image_url'],
    33                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    34                             offer['offer_description'],
    35                             offer['offer_shop_code'])
    36     database_offers.append(phoneOffer)
     38    for offer in offers:
     39        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     40                                offer['ram_memory'],
     41                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     42                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     43                                offer['image_url'],
     44                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     45                                offer['offer_description'],
     46                                offer['offer_shop_code'])
     47        database_offers.append(phoneOffer)
    3748
    38 new_offers = []
     49    new_offers = []
    3950
    40 for i in range(1, 11):
    41     neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i)
     51    for i in range(1, 11):
     52        neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i)
    4253
    43     # selenium is used because of the dynamic content of the page
    44     driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    45     driver1.get(neptun_url)
    46     neptun_html = driver1.page_source
     54        # selenium is used because of the dynamic content of the page
     55        driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     56        driver1.get(neptun_url)
     57        neptun_html = driver1.page_source
    4758
    48     # closing the driver so the safari instance can pair with another webdriver session
    49     driver1.close()
    50 
    51     # response1 = requests.get(neptun_url)
    52     soup1 = BeautifulSoup(neptun_html, 'html.parser')
    53 
    54     phones = soup1.find('div', {'id': 'mainContainer'}).find('div',
    55                                                              {'class': 'col-lg-9 col-md-9 col-sm-8 col-fix-main'}) \
    56         .find_all('div', {'class': 'ng-scope product-list-item-grid'})
    57 
    58     for phone in phones:
    59         offer_url = 'https://www.neptun.mk' + phone.find('a').get('href')
    60         offer_name = phone.find('a').find('h2').get_text().replace('MOB.TEL.', '').strip()
    61         brand = offer_name.split(' ')[0].strip().capitalize()
    62         image_url = 'https://www.neptun.mk' + phone.find('a').find('div', {'class': 'row'}).find('img').get('src')
    63         price = int(
    64             phone.find('div', {'class': 'col-sm-12 static'}).find('div', {'class': 'product-list-item__prices pt35'})
    65             .find('div', {'class': 'row'}).find('div', {'class': 'newPriceModel'}) \
    66             .find('span', {'class': 'product-price__amount--value ng-binding'}).get_text().replace('.', ''))
    67 
    68         driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    69         driver1.get(offer_url)
    70         offer_html = driver1.page_source
    7159        # closing the driver so the safari instance can pair with another webdriver session
    7260        driver1.close()
    7361
    74         soup2 = BeautifulSoup(offer_html, 'html.parser')
     62        # response1 = requests.get(neptun_url)
     63        soup1 = BeautifulSoup(neptun_html, 'html.parser')
    7564
    76         offer_shop_code = soup2.find('div', {'ng-if': 'showProductDetails'}) \
    77             .find('div', {'class': 'product-details-first-row'}).find('span', {
    78             'ng-bind': 'model.CodeNumber'}).get_text().strip()
     65        phones = soup1.find('div', {'id': 'mainContainer'}).find('div',
     66                                                                 {'class': 'col-lg-9 col-md-9 col-sm-8 col-fix-main'}) \
     67            .find_all('div', {'class': 'ng-scope product-list-item-grid'})
    7968
    80         specifications_table = \
    81             soup2.find('div', {'id': 'mainContainer'}).find('div', {'ng-if': 'showProductDetails'}).find_all('ul')[-1]
    82         specifications = specifications_table.get_text(separator='\n').strip().split("\n")
     69        for phone in phones:
     70            offer_url = 'https://www.neptun.mk' + phone.find('a').get('href')
     71            offer_name = phone.find('a').find('h2').get_text().replace('MOB.TEL.', '').strip()
     72            brand = offer_name.split(' ')[0].strip().capitalize()
     73            image_url = 'https://www.neptun.mk' + phone.find('a').find('div', {'class': 'row'}).find('img').get('src')
     74            price = int(
     75                phone.find('div', {'class': 'col-sm-12 static'}).find('div', {'class': 'product-list-item__prices pt35'})
     76                .find('div', {'class': 'row'}).find('div', {'class': 'newPriceModel'}) \
     77                .find('span', {'class': 'product-price__amount--value ng-binding'}).get_text().replace('.', ''))
    8378
    84         offer_description = specifications_table.get_text(separator='\n').strip()
     79            driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     80            driver1.get(offer_url)
     81            offer_html = driver1.page_source
     82            # closing the driver so the safari instance can pair with another webdriver session
     83            driver1.close()
    8584
    86         back_camera = None
    87         operating_system = None
    88         chipset = None
    89         battery = None
    90         ram_memory = None
    91         rom_memory = None
    92         cpu = None
    93         front_camera = None
    94         color = None
     85            soup2 = BeautifulSoup(offer_html, 'html.parser')
    9586
    96         for specification in specifications:
    97             if 'Батерија:' in specification:
    98                 battery = specification.split('Батерија:')[1]
     87            offer_shop_code = soup2.find('div', {'ng-if': 'showProductDetails'}) \
     88                .find('div', {'class': 'product-details-first-row'}).find('span', {
     89                'ng-bind': 'model.CodeNumber'}).get_text().strip()
    9990
    100             if 'CPU:' in specification:
    101                 cpu = specification.split('CPU:')[1]
     91            specifications_table = \
     92                soup2.find('div', {'id': 'mainContainer'}).find('div', {'ng-if': 'showProductDetails'}).find_all('ul')[-1]
     93            specifications = specifications_table.get_text(separator='\n').strip().split("\n")
    10294
    103             if 'Chipset:' in specification:
    104                 chipset = specification.split('Chipset:')[1]
     95            offer_description = specifications_table.get_text(separator='\n').strip()
    10596
    106             if 'RAM Меморија:' in specification:
    107                 ram_memory = specification.split('RAM Меморија:')[1]
    108                 continue
     97            back_camera = None
     98            operating_system = None
     99            chipset = None
     100            battery = None
     101            ram_memory = None
     102            rom_memory = None
     103            cpu = None
     104            front_camera = None
     105            color = None
    109106
    110             if 'ROM Меморија:' in specification:
    111                 rom_memory = specification.split('ROM Меморија:')[1]
    112                 continue
     107            for specification in specifications:
     108                if 'Батерија:' in specification:
     109                    battery = specification.split('Батерија:')[1]
    113110
    114             if 'ROM:' in specification:
    115                 rom_memory = specification.split('ROM:')[1]
     111                if 'CPU:' in specification:
     112                    cpu = specification.split('CPU:')[1]
    116113
    117             if 'RAM:' in specification:
    118                 ram_memory = specification.split('RAM:')[1]
     114                if 'Chipset:' in specification:
     115                    chipset = specification.split('Chipset:')[1]
    119116
    120             if 'iOS' in specification or 'Android' in specification:
    121                 operating_system = specification
     117                if 'RAM Меморија:' in specification:
     118                    ram_memory = specification.split('RAM Меморија:')[1]
     119                    continue
    122120
    123         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    124                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    125                                      image_url,
    126                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     121                if 'ROM Меморија:' in specification:
     122                    rom_memory = specification.split('ROM Меморија:')[1]
     123                    continue
    127124
    128 for new_offer in new_offers:
    129     flag = False
    130     flag_price = False
    131     offer_id = None
     125                if 'ROM:' in specification:
     126                    rom_memory = specification.split('ROM:')[1]
     127
     128                if 'RAM:' in specification:
     129                    ram_memory = specification.split('RAM:')[1]
     130
     131                if 'iOS' in specification or 'Android' in specification:
     132                    operating_system = specification
     133
     134            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     135                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     136                                         image_url,
     137                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     138
     139    for new_offer in new_offers:
     140        flag = False
     141        flag_price = False
     142        offer_id = None
     143
     144        for old_offer in database_offers:
     145
     146            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     147                flag = True
     148                if new_offer.price != old_offer.price:
     149                    flag_price = True
     150                    offer_id = old_offer.offer_id
     151
     152        if flag:
     153            # print('ALREADY IN DATABASE')
     154            # print(new_offer)
     155            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     156            if flag_price:
     157                print('PRICE CHANGED!')  # CHANGE PRICE
     158                print('offer id: ' + str(offer_id))
     159                headers = {'Content-type': 'application/json'}
     160                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     161                             headers=headers)
     162        else:
     163            print('ADDED')  # ADD OFFER
     164            print(new_offer)
     165            headers = {'Content-type': 'application/json'}
     166            requests.post('http://localhost:8080/phoneoffer/addoffer',
     167                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     168
     169    print('------------------------------------')
    132170
    133171    for old_offer in database_offers:
     172        flag = False
     173        for new_offer in new_offers:
     174            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     175                flag = True
    134176
    135         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    136             flag = True
    137             if new_offer.price != old_offer.price:
    138                 flag_price = True
    139                 offer_id = old_offer.offer_id
    140 
    141     if flag:
    142         # print('ALREADY IN DATABASE')
    143         # print(new_offer)
    144         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    145         if flag_price:
    146             print('PRICE CHANGED!')  # CHANGE PRICE
    147             print('offer id: ' + str(offer_id))
    148             headers = {'Content-type': 'application/json'}
    149             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    150                          headers=headers)
    151     else:
    152         print('ADDED')  # ADD OFFER
    153         print(new_offer)
    154         headers = {'Content-type': 'application/json'}
    155         requests.post('http://localhost:8080/phoneoffer/addoffer',
    156                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    157 
    158 print('------------------------------------')
    159 
    160 for old_offer in database_offers:
    161     flag = False
    162     for new_offer in new_offers:
    163         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    164             flag = True
    165 
    166     if not flag:
    167         print('OFFER DELETED')
    168         print(old_offer)
    169         # DELETE OFFER
    170         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     177        if not flag:
     178            print('OFFER DELETED')
     179            print(old_offer)
     180            # DELETE OFFER
     181            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     182except Exception:
     183    traceback.print_exc()
     184    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     185                    ' VALUES (%s, %s, %s);'
     186    insert_value = (offer_shop, last_updated, 'failed')
     187    cur.execute(insert_script, insert_value)
     188    db_connection.commit()
     189    cur.close()
     190    db_connection.close()
     191else:
     192    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     193                    ' VALUES (%s, %s, %s);'
     194    insert_value = (offer_shop, last_updated, 'success')
     195    cur.execute(insert_script, insert_value)
     196    db_connection.commit()
     197    cur.close()
     198    db_connection.close()
Note: See TracChangeset for help on using the changeset viewer.