Ignore:
Timestamp:
11/20/22 16:34:52 (20 months ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/mobigo_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1819is_validated = False
    1920
    20 # Mobi Go phone offers that are already in database
    21 
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text))
    23 
    24 database_offers = []
    25 
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
    36 
    37 new_offers = []
    38 
    39 
    40 for i in range(1, 6):
    41     mobigo_url = "https://mobigo.mk/page/" + str(i) + "/"
    42 
    43     response1 = requests.get(mobigo_url)
    44 
    45     soup1 = BeautifulSoup(response1.content, 'html.parser')
    46 
    47     phone_sections = soup1.find_all('ul', {'class': 'recent-posts'})
    48     phones = phone_sections[len(phone_sections) - 1].find_all('li')
    49 
    50     for phone in phones:
    51         offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href')  # offer url
    52         image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src')  # image url
    53         offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip()  # offer_name
    54 
    55         if "Watch" in offer_name or "Tab" in offer_name:  # if the product is watch or tablet, continue
    56             continue
    57 
    58         price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \
    59                           .get_text().replace('ден.', '').replace('.', '').strip()))  # price
    60 
    61         response2 = requests.get(offer_url)
    62         soup2 = BeautifulSoup(response2.content, 'html.parser')
    63 
    64         brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip()  # brand
    65 
    66         if brand not in offer_name:
    67             offer_name = brand + " " + offer_name
    68 
    69         specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr')
    70 
    71         ram_memory = None
    72         rom_memory = None
    73         battery = None
    74         back_camera = None
    75         front_camera = None
    76         chipset = None
    77         operating_system = None
    78         cpu = None
    79         offer_shop_code = None
    80         offer_description = None
    81         color = None
    82 
    83         for specification in specifications:
    84             if specification.find('td') == None:
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
     30
     31try:
     32    # Mobi Go phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text))
     34
     35    database_offers = []
     36
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
     47
     48    new_offers = []
     49
     50
     51    for i in range(1, 6):
     52        mobigo_url = "https://mobigo.mk/page/" + str(i) + "/"
     53
     54        response1 = requests.get(mobigo_url)
     55
     56        soup1 = BeautifulSoup(response1.content, 'html.parser')
     57
     58        phone_sections = soup1.find_all('ul', {'class': 'recent-posts'})
     59        phones = phone_sections[len(phone_sections) - 1].find_all('li')
     60
     61        for phone in phones:
     62            offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href')  # offer url
     63            image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src')  # image url
     64            offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip()  # offer_name
     65
     66            if "Watch" in offer_name or "Tab" in offer_name:  # if the product is watch or tablet, continue
    8567                continue
    8668
    87             # operating system
    88             if specification.find('td').get_text() == "Платформа":
    89                 if specification.find('i').get_text() != "/":
    90                     operating_system = specification.find('i').get_text().strip()
    91                 else:
    92                     operating_system = None
    93 
    94             # chipset
    95             if specification.find('td').get_text() == "Chipset":
    96                 if specification.find('i').get_text() != "/":
    97                     chipset = specification.find('i').get_text().strip()
    98                 else:
    99                     chipset = None
    100 
    101             # ram and rom memory
    102             if specification.find('td').get_text() == "Меморија":
    103                 if specification.find('i').get_text() != "/":
    104                     rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip()
    105                     ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip()
    106                 else:
    107                     rom_memory = None
    108                     ram_memory = None
    109 
    110             # back camera
    111             if specification.find('td').get_text() == "Главна Камера":
    112                 if specification.find('i').get_text() != "/":
    113                     back_camera = specification.find('i').get_text().strip()
    114                 else:
    115                     back_camera = None
    116 
    117             # front camera
    118             if specification.find('td').get_text() == "Селфи Камера":
    119                 if specification.find('i').get_text() != "/":
    120                     front_camera = specification.find('i').get_text().strip()
    121                 else:
    122                     front_camera = None
    123 
    124             # battery
    125             if specification.find('td').get_text() == "Батерија":
    126                 if specification.find('i').get_text() != "/":
    127                     battery = specification.find('i').get_text().strip()
    128                 else:
    129                     battery = None
    130 
    131         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    132                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    133                                      image_url,
    134                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    135 
    136 
    137 for new_offer in new_offers:
    138     flag = False
    139     flag_price = False
    140     offer_id = None
     69            price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \
     70                              .get_text().replace('ден.', '').replace('.', '').strip()))  # price
     71
     72            response2 = requests.get(offer_url)
     73            soup2 = BeautifulSoup(response2.content, 'html.parser')
     74
     75            brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip()  # brand
     76
     77            if brand not in offer_name:
     78                offer_name = brand + " " + offer_name
     79
     80            specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr')
     81
     82            ram_memory = None
     83            rom_memory = None
     84            battery = None
     85            back_camera = None
     86            front_camera = None
     87            chipset = None
     88            operating_system = None
     89            cpu = None
     90            offer_shop_code = None
     91            offer_description = None
     92            color = None
     93
     94            for specification in specifications:
     95                if specification.find('td') == None:
     96                    continue
     97
     98                # operating system
     99                if specification.find('td').get_text() == "Платформа":
     100                    if specification.find('i').get_text() != "/":
     101                        operating_system = specification.find('i').get_text().strip()
     102                    else:
     103                        operating_system = None
     104
     105                # chipset
     106                if specification.find('td').get_text() == "Chipset":
     107                    if specification.find('i').get_text() != "/":
     108                        chipset = specification.find('i').get_text().strip()
     109                    else:
     110                        chipset = None
     111
     112                # ram and rom memory
     113                if specification.find('td').get_text() == "Меморија":
     114                    if specification.find('i').get_text() != "/":
     115                        rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip()
     116                        ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip()
     117                    else:
     118                        rom_memory = None
     119                        ram_memory = None
     120
     121                # back camera
     122                if specification.find('td').get_text() == "Главна Камера":
     123                    if specification.find('i').get_text() != "/":
     124                        back_camera = specification.find('i').get_text().strip()
     125                    else:
     126                        back_camera = None
     127
     128                # front camera
     129                if specification.find('td').get_text() == "Селфи Камера":
     130                    if specification.find('i').get_text() != "/":
     131                        front_camera = specification.find('i').get_text().strip()
     132                    else:
     133                        front_camera = None
     134
     135                # battery
     136                if specification.find('td').get_text() == "Батерија":
     137                    if specification.find('i').get_text() != "/":
     138                        battery = specification.find('i').get_text().strip()
     139                    else:
     140                        battery = None
     141
     142            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     143                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     144                                         image_url,
     145                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     146
     147
     148    for new_offer in new_offers:
     149        flag = False
     150        flag_price = False
     151        offer_id = None
     152
     153        for old_offer in database_offers:
     154
     155            if new_offer.offer_name == old_offer.offer_name:
     156                flag = True
     157                if new_offer.price != old_offer.price:
     158                    flag_price = True
     159                    offer_id = old_offer.offer_id
     160
     161        if flag:
     162            print('ALREADY IN DATABASE')
     163            print(new_offer)
     164            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     165            if flag_price:
     166                print('PRICE CHANGED!')  # CHANGE PRICE
     167                print('offer id: ' + str(offer_id))
     168                headers = {'Content-type': 'application/json'}
     169                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     170                             headers=headers)
     171        else:
     172            print('ADDED')  # ADD OFFER
     173            print(new_offer)
     174            headers = {'Content-type': 'application/json'}
     175            requests.post('http://localhost:8080/phoneoffer/addoffer',
     176                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     177
     178    print('------------------------------------')
    141179
    142180    for old_offer in database_offers:
    143 
    144         if new_offer.offer_name == old_offer.offer_name:
    145             flag = True
    146             if new_offer.price != old_offer.price:
    147                 flag_price = True
    148                 offer_id = old_offer.offer_id
    149 
    150     if flag:
    151         print('ALREADY IN DATABASE')
    152         print(new_offer)
    153         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    154         if flag_price:
    155             print('PRICE CHANGED!')  # CHANGE PRICE
    156             print('offer id: ' + str(offer_id))
    157             headers = {'Content-type': 'application/json'}
    158             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    159                          headers=headers)
    160     else:
    161         print('ADDED')  # ADD OFFER
    162         print(new_offer)
    163         headers = {'Content-type': 'application/json'}
    164         requests.post('http://localhost:8080/phoneoffer/addoffer',
    165                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    166 
    167 print('------------------------------------')
    168 
    169 for old_offer in database_offers:
    170     flag = False
    171     for new_offer in new_offers:
    172         if old_offer.offer_name == new_offer.offer_name:
    173             flag = True
    174 
    175     if not flag:
    176         print('OFFER DELETED')
    177         print(old_offer)
    178         # DELETE OFFER
    179         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     181        flag = False
     182        for new_offer in new_offers:
     183            if old_offer.offer_name == new_offer.offer_name:
     184                flag = True
     185
     186        if not flag:
     187            print('OFFER DELETED')
     188            print(old_offer)
     189            # DELETE OFFER
     190            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     191except Exception:
     192    traceback.print_exc()
     193    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     194                    ' VALUES (%s, %s, %s);'
     195    insert_value = (offer_shop, last_updated, 'failed')
     196    cur.execute(insert_script, insert_value)
     197    db_connection.commit()
     198    cur.close()
     199    db_connection.close()
     200else:
     201    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     202                    ' VALUES (%s, %s, %s);'
     203    insert_value = (offer_shop, last_updated, 'success')
     204    cur.execute(insert_script, insert_value)
     205    db_connection.commit()
     206    cur.close()
     207    db_connection.close()
Note: See TracChangeset for help on using the changeset viewer.