Ignore:
Timestamp:
11/20/22 16:34:52 (2 years ago)
Author:
Marko <Marko@…>
Branches:
master
Parents:
ffd50db
Message:

Final features implemented

Location:
phonelux_scrappers/scrappers
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/a1_scrapper.py

    rffd50db r47f4eaf  
     1import traceback
    12import unicodedata
    23from datetime import datetime
     
    1819is_validated = False
    1920
    20 # A1 phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    2130
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
     31try:
     32    # A1 phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/a1').text))
    2334
    24 database_offers = []
     35    database_offers = []
    2536
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
    3647
    37 a1_url = 'https://www.a1.mk/webshop/mk/phones'
     48    a1_url = 'https://www.a1.mk/webshop/mk/phones'
    3849
    39 response1 = requests.get(a1_url)
    40 soup1 = BeautifulSoup(response1.content, 'html.parser')
     50    response1 = requests.get(a1_url)
     51    soup1 = BeautifulSoup(response1.content, 'html.parser')
    4152
    42 phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
    43     .find_all('div', {'class', 'dvc-idtfr by4'})
     53    phones = soup1.find('main', {'class', 'gsm-advisor-grid phones'}).find('div', {'class', 'd-flex'}) \
     54        .find_all('div', {'class', 'dvc-idtfr by4'})
    4455
    45 new_offers = []
     56    new_offers = []
    4657
    47 for phone in phones:
    48     brand = phone.get('data-brand').strip()
    49     offer_name = brand + " " + phone.get('data-model').strip()
     58    for phone in phones:
     59        brand = phone.get('data-brand').strip()
     60        offer_name = brand + " " + phone.get('data-model').strip()
    5061
    51     # if brand not in offer_name:
    52     #     offer_name = brand+" "+offer_name
     62        # if brand not in offer_name:
     63        #     offer_name = brand+" "+offer_name
    5364
    54     offer_shop_code = phone.get('data-productid').strip()
    55     offer_url = phone.find('a', {'class', 'device-link'}).get('href')
    56     image_url = phone.get('data-image')
     65        offer_shop_code = phone.get('data-productid').strip()
     66        offer_url = phone.find('a', {'class', 'device-link'}).get('href')
     67        image_url = phone.get('data-image')
    5768
    58     response2 = requests.get(offer_url)
    59     soup2 = BeautifulSoup(response2.content, 'html.parser')
     69        response2 = requests.get(offer_url)
     70        soup2 = BeautifulSoup(response2.content, 'html.parser')
    6071
    61     temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
    62         .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
     72        temp_prices = soup2.find('div', {'class': 'ured-tabs-content'}) \
     73            .find('div', {'class': 'cenovnik-secondary d-flex justify-content-between'}).find_all('div')
    6374
    64     # offer price
    65     price = None
    66     for temp_price in temp_prices:
    67         if 'Цена само за уред' in temp_price.get_text().strip():
    68             price = int(temp_price.get_text().replace('Цена само за уред', '')
    69                         .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
     75        # offer price
     76        price = None
     77        for temp_price in temp_prices:
     78            if 'Цена само за уред' in temp_price.get_text().strip():
     79                price = int(temp_price.get_text().replace('Цена само за уред', '')
     80                            .replace('Одбери', '').replace('денари', '').replace('.', '').strip())
    7081
    71     colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
     82        colors_section = soup2.find('div', {'id': 'hero'}).find('div', {'class': 'widget'}).find_all('label')
    7283
    73     temp_colors = []
    74     for color_section in colors_section:
    75         temp_colors.append(color_section.get('data-content'))
     84        temp_colors = []
     85        for color_section in colors_section:
     86            temp_colors.append(color_section.get('data-content'))
    7687
    77     color = ','.join(temp_colors)  # colors available for the offer
     88        color = ','.join(temp_colors)  # colors available for the offer
    7889
    79     phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
     90        phone_description = soup2.find('div', {'class': 'desc section'}).find('p').get_text().strip()
    8091
    81     table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
     92        table_rows = soup2.find('table', {'class': 'table karakteristiki'}).find_all('tr')
    8293
    83     back_camera = None
    84     operating_system = None
    85     cpu = None
    86     rom_memory = None
    87     ram_memory = None
    88     battery = None
    89     front_camera = None
    90     chipset = None
    91     offer_description = None
     94        back_camera = None
     95        operating_system = None
     96        cpu = None
     97        rom_memory = None
     98        ram_memory = None
     99        battery = None
     100        front_camera = None
     101        chipset = None
     102        offer_description = None
    92103
    93     for row in table_rows:
    94         if 'Камера' in row.get_text().strip():
    95             back_camera = row.get_text().replace('Камера', '').strip()
     104        for row in table_rows:
     105            if 'Камера' in row.get_text().strip():
     106                back_camera = row.get_text().replace('Камера', '').strip()
    96107
    97         if 'Оперативен систем' in row.get_text().strip():
    98             operating_system = row.get_text().replace('Оперативен систем', '').strip()
     108            if 'Оперативен систем' in row.get_text().strip():
     109                operating_system = row.get_text().replace('Оперативен систем', '').strip()
    99110
    100         if 'CPU' in row.get_text().strip():
    101             cpu = row.get_text().replace('CPU', '').strip()
     111            if 'CPU' in row.get_text().strip():
     112                cpu = row.get_text().replace('CPU', '').strip()
    102113
    103         if 'Вградена меморија' in row.get_text().strip():
    104             rom_memory = row.get_text().replace('Вградена меморија', '').strip()
     114            if 'Вградена меморија' in row.get_text().strip():
     115                rom_memory = row.get_text().replace('Вградена меморија', '').strip()
    105116
    106         if 'RAM меморија' in row.get_text().strip():
    107             ram_memory = row.get_text().replace('RAM меморија', '').strip()
     117            if 'RAM меморија' in row.get_text().strip():
     118                ram_memory = row.get_text().replace('RAM меморија', '').strip()
    108119
    109         if 'Батерија' in row.get_text().strip():
    110             battery = row.get_text().replace('Батерија', '').strip()
     120            if 'Батерија' in row.get_text().strip():
     121                battery = row.get_text().replace('Батерија', '').strip()
    111122
    112         if 'Предна камера' in row.get_text().strip():
    113             front_camera = row.get_text().replace('Предна камера', '').strip()
     123            if 'Предна камера' in row.get_text().strip():
     124                front_camera = row.get_text().replace('Предна камера', '').strip()
    114125
    115     new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    116                                  color, front_camera, back_camera, chipset, battery, operating_system, cpu, image_url,
    117                                  offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     126        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     127                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     128                                     image_url,
     129                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    118130
    119 for new_offer in new_offers:
    120     flag = False
    121     flag_price = False
    122     offer_id = None
     131    for new_offer in new_offers:
     132        flag = False
     133        flag_price = False
     134        offer_id = None
     135
     136        for old_offer in database_offers:
     137
     138            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     139                flag = True
     140                if new_offer.price != old_offer.price:
     141                    flag_price = True
     142                    offer_id = old_offer.offer_id
     143
     144        if flag:
     145            # print('ALREADY IN DATABASE')
     146            # print(new_offer)
     147            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     148            if flag_price:
     149                print('PRICE CHANGED!')  # CHANGE PRICE
     150                print('offer id: ' + str(offer_id))
     151                headers = {'Content-type': 'application/json'}
     152                requests.put(
     153                    'http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     154                    headers=headers)
     155        else:
     156            print('ADDED')  # ADD OFFER
     157            print(new_offer)
     158            headers = {'Content-type': 'application/json'}
     159            requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers,
     160                          data=json.dumps(new_offer.__dict__,
     161                                          default=str))
     162
     163    print('------------------------------------')
    123164
    124165    for old_offer in database_offers:
     166        flag = False
     167        for new_offer in new_offers:
     168            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     169                flag = True
    125170
    126         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    127             flag = True
    128             if new_offer.price != old_offer.price:
    129                 flag_price = True
    130                 offer_id = old_offer.offer_id
    131 
    132     if flag:
    133         # print('ALREADY IN DATABASE')
    134         # print(new_offer)
    135         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    136         if flag_price:
    137             print('PRICE CHANGED!')  # CHANGE PRICE
    138             print('offer id: ' + str(offer_id))
    139             headers = {'Content-type': 'application/json'}
    140             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    141                          headers=headers)
    142     else:
    143         print('ADDED')  # ADD OFFER
    144         print(new_offer)
    145         headers = {'Content-type': 'application/json'}
    146         requests.post('http://localhost:8080/phoneoffer/addoffer', headers=headers, data=json.dumps(new_offer.__dict__,
    147                                                                                                     default=str))
    148 
    149 print('------------------------------------')
    150 
    151 for old_offer in database_offers:
    152     flag = False
    153     for new_offer in new_offers:
    154         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    155             flag = True
    156 
    157     if not flag:
    158         print('OFFER DELETED')
    159         print(old_offer)
    160         # DELETE OFFER
    161         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     171        if not flag:
     172            print('OFFER DELETED')
     173            print(old_offer)
     174            # DELETE OFFER
     175            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     176except Exception:
     177    traceback.print_exc()
     178    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     179                    ' VALUES (%s, %s, %s);'
     180    insert_value = (offer_shop, last_updated, 'failed')
     181    cur.execute(insert_script, insert_value)
     182    db_connection.commit()
     183    cur.close()
     184    db_connection.close()
     185else:
     186    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     187                    ' VALUES (%s, %s, %s);'
     188    insert_value = (offer_shop, last_updated, 'success')
     189    cur.execute(insert_script, insert_value)
     190    db_connection.commit()
     191    cur.close()
     192    db_connection.close()
  • phonelux_scrappers/scrappers/akcija_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23from datetime import datetime
    34
     
    1819is_validated = False
    1920
    20 # Akcija phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    2130
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text))
     31try:
     32    # Akcija phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/akcija').text))
    2334
    24 database_offers = []
     35    database_offers = []
    2536
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
    3647
    37 new_offers = []
     48    new_offers = []
    3849
    39 i = 0
    40 while i <= 20:
    41     akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"
    42     response1 = requests.get(akcija_url)
    43     response1.encoding = 'utf-8'
    44     soup1 = BeautifulSoup(response1.text, 'html.parser')
     50    i = 0
     51    while i <= 20:
     52        akcija_url = "https://akcija.com.mk/listing/" + str(i) + "?category=mobilnitelefoni"
     53        response1 = requests.get(akcija_url)
     54        response1.encoding = 'utf-8'
     55        soup1 = BeautifulSoup(response1.text, 'html.parser')
    4556
    46     phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})
     57        phones = soup1.find_all('div', {'class', 'product-item__body pb-xl-2'})
    4758
    48     for phone in phones:
    49         offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \
    50             .get_text().replace('Паметен телефон', '').strip()
    51         brand = offer_name.split(' ')[0]
     59        for phone in phones:
     60            offer_name = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a') \
     61                .get_text().replace('Паметен телефон', '').strip()
     62            brand = offer_name.split(' ')[0]
    5263
    53         if brand not in offer_name:
    54             offer_name = brand + " " + offer_name
     64            if brand not in offer_name:
     65                offer_name = brand + " " + offer_name
    5566
    56         offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')
    57         image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')
    58         price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \
    59                     .find('ins').get_text().split(' ')[0].strip())
     67            offer_url = phone.find('h5', {'class': 'mb-1 product-item__title'}).find('a').get('href')
     68            image_url = phone.find('div', {'class', 'mb-2'}).find('img').get('src')
     69            price = int(phone.find('div', {'class', 'flex-center-between mb-1 pt-xl-2'}) \
     70                        .find('ins').get_text().split(' ')[0].strip())
    6071
    61         response2 = requests.get(offer_url)
    62         response2.encoding = 'utf-8'
    63         soup2 = BeautifulSoup(response2.text, 'html.parser')
     72            response2 = requests.get(offer_url)
     73            response2.encoding = 'utf-8'
     74            soup2 = BeautifulSoup(response2.text, 'html.parser')
    6475
    65         back_camera = None
    66         operating_system = None
    67         chipset = None
    68         battery = None
    69         ram_memory = None
    70         rom_memory = None
    71         cpu = None
    72         front_camera = None
    73         color = None
    74         offer_shop_code = None
     76            back_camera = None
     77            operating_system = None
     78            chipset = None
     79            battery = None
     80            ram_memory = None
     81            rom_memory = None
     82            cpu = None
     83            front_camera = None
     84            color = None
     85            offer_shop_code = None
    7586
    76         specifications = soup2.find('main', {'id': 'content'}) \
    77             .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
    78             .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')
     87            specifications = soup2.find('main', {'id': 'content'}) \
     88                .find_all('div', {'class', 'container'})[1].find('div', {'class', 'mb-14'}) \
     89                .find('div', {'class', 'col-md-6 col-lg-4 col-xl-4 mb-md-6 mb-lg-0'}).find_all('p')
    7990
    80         offer_description = ''
    81         for specification in specifications:
    82             if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):
    83                 continue
    84             offer_description += unicodedata.normalize('NFKD',
    85                                                        str(specification.get_text(separator='\n').strip())) + "\n"
     91            offer_description = ''
     92            for specification in specifications:
     93                if 'Код за нарачка' in str(specification.get_text(separator='\n').replace('NBSP', '').strip()):
     94                    continue
     95                offer_description += unicodedata.normalize('NFKD',
     96                                                           str(specification.get_text(separator='\n').strip())) + "\n"
    8697
    87         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    88                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    89                                      image_url,
    90                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    91     i += 20
     98            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     99                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     100                                         image_url,
     101                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     102        i += 20
    92103
    93 for new_offer in new_offers:
    94     flag = False
    95     flag_price = False
    96     offer_id = None
     104    for new_offer in new_offers:
     105        flag = False
     106        flag_price = False
     107        offer_id = None
     108
     109        for old_offer in database_offers:
     110
     111            if new_offer.offer_name == old_offer.offer_name:
     112                flag = True
     113                if new_offer.price != old_offer.price:
     114                    flag_price = True
     115                    offer_id = old_offer.offer_id
     116
     117        if flag:
     118            # print('ALREADY IN DATABASE')
     119            # print(new_offer)
     120            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     121            if flag_price:
     122                print('PRICE CHANGED!')  # CHANGE PRICE
     123                print('offer id: ' + str(offer_id))
     124                headers = {'Content-type': 'application/json'}
     125                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     126                             headers=headers)
     127        else:
     128            print('ADDED')  # ADD OFFER
     129            print(new_offer)
     130            headers = {'Content-type': 'application/json'}
     131            requests.post('http://localhost:8080/phoneoffer/addoffer',
     132                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     133
     134    print('------------------------------------')
    97135
    98136    for old_offer in database_offers:
     137        flag = False
     138        for new_offer in new_offers:
     139            if old_offer.offer_name == new_offer.offer_name:
     140                flag = True
    99141
    100         if new_offer.offer_name == old_offer.offer_name:
    101             flag = True
    102             if new_offer.price != old_offer.price:
    103                 flag_price = True
    104                 offer_id = old_offer.offer_id
     142        if not flag:
     143            print('OFFER DELETED')
     144            print(old_offer)
     145            # DELETE OFFER
     146            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     147except Exception:
     148    traceback.print_exc()
     149    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     150                    ' VALUES (%s, %s, %s);'
     151    insert_value = (offer_shop, last_updated, 'failed')
     152    cur.execute(insert_script, insert_value)
     153    db_connection.commit()
     154    cur.close()
     155    db_connection.close()
     156else:
     157    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     158                    ' VALUES (%s, %s, %s);'
     159    insert_value = (offer_shop, last_updated, 'success')
     160    cur.execute(insert_script, insert_value)
     161    db_connection.commit()
     162    cur.close()
     163    db_connection.close()
    105164
    106     if flag:
    107         # print('ALREADY IN DATABASE')
    108         # print(new_offer)
    109         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    110         if flag_price:
    111             print('PRICE CHANGED!')  # CHANGE PRICE
    112             print('offer id: ' + str(offer_id))
    113             headers = {'Content-type': 'application/json'}
    114             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    115                          headers=headers)
    116     else:
    117         print('ADDED')  # ADD OFFER
    118         print(new_offer)
    119         headers = {'Content-type': 'application/json'}
    120         requests.post('http://localhost:8080/phoneoffer/addoffer',
    121                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    122 
    123 print('------------------------------------')
    124 
    125 for old_offer in database_offers:
    126     flag = False
    127     for new_offer in new_offers:
    128         if old_offer.offer_name == new_offer.offer_name:
    129             flag = True
    130 
    131     if not flag:
    132         print('OFFER DELETED')
    133         print(old_offer)
    134         # DELETE OFFER
    135         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
  • phonelux_scrappers/scrappers/handy_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    2021is_validated = False
    2122
    22 # Handy phone offers that are already in database
    23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text))
     23# Call to read the configuration file and connect to database
     24cinfo = config_read.get_databaseconfig("../postgresdb.config")
     25db_connection = psycopg2.connect(
     26    database=cinfo[0],
     27    host=cinfo[1],
     28    user=cinfo[2],
     29    password=cinfo[3]
     30)
     31cur = db_connection.cursor()
    2432
    25 database_offers = []
     33try:
     34    # Handy phone offers that are already in database
     35    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/handy').text))
    2636
    27 for offer in offers:
    28     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    29                             offer['ram_memory'],
    30                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    31                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    32                             offer['image_url'],
    33                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    34                             offer['offer_description'],
    35                             offer['offer_shop_code'])
    36     database_offers.append(phoneOffer)
     37    database_offers = []
    3738
    38 new_offers = []
     39    for offer in offers:
     40        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     41                                offer['ram_memory'],
     42                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     43                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     44                                offer['image_url'],
     45                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     46                                offer['offer_description'],
     47                                offer['offer_shop_code'])
     48        database_offers.append(phoneOffer)
    3949
    40 handy_url = 'https://www.handy.mk/telefoni?page=6'
     50    new_offers = []
    4151
    42 response1 = requests.get(handy_url)
    43 soup1 = BeautifulSoup(response1.content, 'html.parser')
     52    handy_url = 'https://www.handy.mk/telefoni?page=6'
    4453
    45 phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'})
     54    response1 = requests.get(handy_url)
     55    soup1 = BeautifulSoup(response1.content, 'html.parser')
    4656
    47 for phone in phones:
    48     offer_url = phone.find('a').get('href')
    49     offer_name = phone.find('div', {'data-hook': 'not-image-container'})\
    50         .find('h3', {'data-hook': 'product-item-name'}).get_text().strip()
    51     brand = offer_name.split(' ')[0].capitalize()
    52     price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\
    53         .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.')))
     57    phones = soup1.find_all('li', {'data-hook': 'product-list-grid-item'})
    5458
    55     response2 = requests.get(offer_url)
    56     soup2 = BeautifulSoup(response2.text, 'html.parser')
     59    for phone in phones:
     60        offer_url = phone.find('a').get('href')
     61        offer_name = phone.find('div', {'data-hook': 'not-image-container'})\
     62            .find('h3', {'data-hook': 'product-item-name'}).get_text().strip()
     63        brand = offer_name.split(' ')[0].capitalize()
     64        price = int(float(phone.find('div', {'data-hook': 'not-image-container'}).find('div', {'data-hook': "product-item-product-details"})\
     65            .find('span', {'data-hook': 'product-item-price-to-pay'}).get_text().strip().replace('ден', '').replace('.', '').replace(',', '.')))
    5766
    58     back_camera = None
    59     operating_system = None
    60     chipset = None
    61     battery = None
    62     ram_memory = None
    63     rom_memory = None
    64     cpu = None
    65     front_camera = None
    66     offer_shop_code = None
    67     color = None
    68     image_url = None
     67        response2 = requests.get(offer_url)
     68        soup2 = BeautifulSoup(response2.text, 'html.parser')
    6969
    70     color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'})
    71     if color_section is not None:
    72         temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\
    73             .find_all('input', {'type': 'radio'})
    74         colors_list = []
    75         for temp_color in temp_colors:
    76             colors_list.append(temp_color.get('aria-label'))
    77         color = ','.join(colors_list)
     70        back_camera = None
     71        operating_system = None
     72        chipset = None
     73        battery = None
     74        ram_memory = None
     75        rom_memory = None
     76        cpu = None
     77        front_camera = None
     78        offer_shop_code = None
     79        color = None
     80        image_url = None
    7881
    79     rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li')
     82        color_section = soup2.find('section', {'data-hook': 'product-colors-title-section'})
     83        if color_section is not None:
     84            temp_colors = color_section.find('fieldset', {'class': 'ColorPickerbase3548966286__container'})\
     85                .find_all('input', {'type': 'radio'})
     86            colors_list = []
     87            for temp_color in temp_colors:
     88                colors_list.append(temp_color.get('aria-label'))
     89            color = ','.join(colors_list)
    8090
    81     if len(rows) == 0:
    82         rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr')
     91        rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('li')
    8392
    84     specifications = []
     93        if len(rows) == 0:
     94            rows = soup2.find('div', {'data-hook': 'info-section-description'}).find_all('tr')
    8595
    86     for row in rows:
    87         specifications.append(unicodedata.normalize('NFKD', row.get_text().strip()))
     96        specifications = []
    8897
    89     offer_description = '\n'.join(specifications)
     98        for row in rows:
     99            specifications.append(unicodedata.normalize('NFKD', row.get_text().strip()))
    90100
    91     new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    92                                  color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    93                                  image_url,
    94                                  offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     101        offer_description = '\n'.join(specifications)
    95102
    96 for new_offer in new_offers:
    97     flag = False
    98     flag_price = False
    99     offer_id = None
     103        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     104                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     105                                     image_url,
     106                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     107
     108    for new_offer in new_offers:
     109        flag = False
     110        flag_price = False
     111        offer_id = None
     112
     113        for old_offer in database_offers:
     114
     115            if new_offer.offer_name == old_offer.offer_name:
     116                flag = True
     117                if new_offer.price != old_offer.price:
     118                    flag_price = True
     119                    offer_id = old_offer.offer_id
     120
     121        if flag:
     122            # print('ALREADY IN DATABASE')
     123            # print(new_offer)
     124            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     125            if flag_price:
     126                print('PRICE CHANGED!')  # CHANGE PRICE
     127                print('offer id: ' + str(offer_id))
     128                headers = {'Content-type': 'application/json'}
     129                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     130                             headers=headers)
     131        else:
     132            print('ADDED')  # ADD OFFER
     133            print(new_offer)
     134            headers = {'Content-type': 'application/json'}
     135            requests.post('http://localhost:8080/phoneoffer/addoffer',
     136                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     137
     138    print('------------------------------------')
    100139
    101140    for old_offer in database_offers:
     141        flag = False
     142        for new_offer in new_offers:
     143            if old_offer.offer_name == new_offer.offer_name:
     144                flag = True
    102145
    103         if new_offer.offer_name == old_offer.offer_name:
    104             flag = True
    105             if new_offer.price != old_offer.price:
    106                 flag_price = True
    107                 offer_id = old_offer.offer_id
    108 
    109     if flag:
    110         # print('ALREADY IN DATABASE')
    111         # print(new_offer)
    112         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    113         if flag_price:
    114             print('PRICE CHANGED!')  # CHANGE PRICE
    115             print('offer id: ' + str(offer_id))
    116             headers = {'Content-type': 'application/json'}
    117             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    118                          headers=headers)
    119     else:
    120         print('ADDED')  # ADD OFFER
    121         print(new_offer)
    122         headers = {'Content-type': 'application/json'}
    123         requests.post('http://localhost:8080/phoneoffer/addoffer',
    124                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    125 
    126 print('------------------------------------')
    127 
    128 for old_offer in database_offers:
    129     flag = False
    130     for new_offer in new_offers:
    131         if old_offer.offer_name == new_offer.offer_name:
    132             flag = True
    133 
    134     if not flag:
    135         print('OFFER DELETED')
    136         print(old_offer)
    137         # DELETE OFFER
    138         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     146        if not flag:
     147            print('OFFER DELETED')
     148            print(old_offer)
     149            # DELETE OFFER
     150            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     151except Exception:
     152    traceback.print_exc()
     153    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     154                    ' VALUES (%s, %s, %s);'
     155    insert_value = (offer_shop, last_updated, 'failed')
     156    cur.execute(insert_script, insert_value)
     157    db_connection.commit()
     158    cur.close()
     159    db_connection.close()
     160else:
     161    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     162                    ' VALUES (%s, %s, %s);'
     163    insert_value = (offer_shop, last_updated, 'success')
     164    cur.execute(insert_script, insert_value)
     165    db_connection.commit()
     166    cur.close()
     167    db_connection.close()
    139168
    140169
     170
  • phonelux_scrappers/scrappers/ledikom_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1920is_validated = False
    2021
    21 # Ledikom phone offers that are already in database
    22 
    23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/ledikom').text))
    24 
    25 database_offers = []
    26 
    27 for offer in offers:
    28     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    29                             offer['ram_memory'],
    30                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    31                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    32                             offer['image_url'],
    33                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    34                             offer['offer_description'],
    35                             offer['offer_shop_code'])
    36     database_offers.append(phoneOffer)
    37 
    38 new_offers = []
    39 
    40 ledikom_phone_urls = [
    41     'https://ledikom.mk/c/416/uredi/apple/iphone?limit=96',
    42     'https://ledikom.mk/c/421/uredi/samsung/telefoni?limit=96',
    43     'https://ledikom.mk/c/424/mobilni-telefoni/xiaomi/telefoni?limit=96',
    44     'https://ledikom.mk/c/430/uredi/huawei/telefoni?limit=96',
    45     'https://ledikom.mk/c/441/uredi/oneplus/telefoni?limit=96',
    46     'https://ledikom.mk/c/413/uredi/google/telefoni?limit=96',
    47     'https://ledikom.mk/c/411/uredi/honor/telefoni?limit=96',
    48     'https://ledikom.mk/c/460/uredi/nokia/telefoni?limit=96',
    49     'https://ledikom.mk/c/461/uredi/asus/telefoni?limit=96',
    50     'https://ledikom.mk/c/488/proizvodi/oppo/telefoni?limit=96'
    51 ]
    52 
    53 for ledikom_url in ledikom_phone_urls:
    54 
    55     # selenium is used because of the dynamic content of the page
    56     driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    57     driver1.get(ledikom_url)
    58     ledikom_html = driver1.page_source
    59 
    60     # closing the driver so the safari instance can pair with another webdriver session
    61     driver1.close()
    62 
    63     soup1 = BeautifulSoup(ledikom_html, 'html.parser')
    64 
    65     phones = soup1.find('div', {'id': 'content'}) \
    66         .find('div', {'class': 'container'}).find('div', {'class': 'row'}).find('div', {'class': 'item-display'}) \
    67         .find_all('div', {'class': 'item-in-grid'})
    68 
    69     if len(phones) == 0:
    70         continue
    71 
    72     for phone in phones:
    73         offer_url = 'https://ledikom.mk' + phone.find('a').get('href')
    74         image_url = phone.find('a').find('img').get('src')
    75         temp_offer_name = phone.find('div', {'class': 'item-name'}).find('a').get_text().strip()
    76         offer_name = ' '.join(temp_offer_name.split())
    77         brand = offer_name.split(' ')[0]
    78         price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '')
    79                     .replace('ден', '')
    80                     .replace('.', '').strip())
    81 
     22# Call to read the configuration file and connect to database
     23cinfo = config_read.get_databaseconfig("../postgresdb.config")
     24db_connection = psycopg2.connect(
     25    database=cinfo[0],
     26    host=cinfo[1],
     27    user=cinfo[2],
     28    password=cinfo[3]
     29)
     30cur = db_connection.cursor()
     31
     32try:
     33    # Ledikom phone offers that are already in database
     34    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/ledikom').text))
     35
     36    database_offers = []
     37
     38    for offer in offers:
     39        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     40                                offer['ram_memory'],
     41                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     42                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     43                                offer['image_url'],
     44                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     45                                offer['offer_description'],
     46                                offer['offer_shop_code'])
     47        database_offers.append(phoneOffer)
     48
     49    new_offers = []
     50
     51    ledikom_phone_urls = [
     52        'https://ledikom.mk/c/416/uredi/apple/iphone?limit=96',
     53        'https://ledikom.mk/c/421/uredi/samsung/telefoni?limit=96',
     54        'https://ledikom.mk/c/424/mobilni-telefoni/xiaomi/telefoni?limit=96',
     55        'https://ledikom.mk/c/430/uredi/huawei/telefoni?limit=96',
     56        'https://ledikom.mk/c/441/uredi/oneplus/telefoni?limit=96',
     57        'https://ledikom.mk/c/413/uredi/google/telefoni?limit=96',
     58        'https://ledikom.mk/c/411/uredi/honor/telefoni?limit=96',
     59        'https://ledikom.mk/c/460/uredi/nokia/telefoni?limit=96',
     60        'https://ledikom.mk/c/461/uredi/asus/telefoni?limit=96',
     61        'https://ledikom.mk/c/488/proizvodi/oppo/telefoni?limit=96'
     62    ]
     63
     64    for ledikom_url in ledikom_phone_urls:
     65
     66        # selenium is used because of the dynamic content of the page
    8267        driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    83         driver1.get(offer_url)
    84         # getting offer page html
    85         offer_html = driver1.page_source
     68        driver1.get(ledikom_url)
     69        ledikom_html = driver1.page_source
     70
     71        # closing the driver so the safari instance can pair with another webdriver session
    8672        driver1.close()
    8773
    88         soup2 = BeautifulSoup(offer_html, 'html.parser')
    89 
    90         specifications = soup2.find('div', {'id': 'content'}).find('section', {'class': 'padding-section'}) \
    91             .find_all('div', {'class': 'container'})[1].find('div', {'class': 'col-md-7'}) \
    92             .find_all('div', {'class': 'row'})
    93 
    94         color = None
    95         rom_memory = None
    96         ram_memory = None
    97         back_camera = None
    98         operating_system = None
    99         chipset = None
    100         battery = None
    101         cpu = None
    102         front_camera = None
    103         offer_shop_code = None
    104         offer_description = None
    105 
    106         if len(specifications) != 0:
    107             colors_tags = specifications[0].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a')
    108             temp_colors = []
    109             for color_tag in colors_tags:
    110                 temp_colors.append(color_tag.get_text().strip())
    111             color = ','.join(temp_colors)
    112 
    113         if len(specifications) >= 2:
    114             temp_rom = specifications[1].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a')
    115             rom_list = []
    116             for rom in temp_rom:
    117                 rom_list.append(rom.get('title'))
    118             rom_memory = ','.join(rom_list)
    119 
    120         if len(specifications) >= 3:
    121             temp_ram = specifications[2].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a')
    122             ram_list = []
    123             for ram in temp_ram:
    124                 ram_list.append(ram.get('title'))
    125 
    126             ram_memory = ','.join(ram_list)
    127 
    128         if 'Xiaomi' in brand:
    129             temp = color
    130             color = rom_memory
    131             rom_memory = temp
    132 
    133             temp = ram_memory
    134             ram_memory = color
    135             color = temp
    136 
    137         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    138                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    139                                      image_url,
    140                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    141 
    142 for new_offer in new_offers:
    143     flag = False
    144     flag_price = False
    145     offer_id = None
     74        soup1 = BeautifulSoup(ledikom_html, 'html.parser')
     75
     76        phones = soup1.find('div', {'id': 'content'}) \
     77            .find('div', {'class': 'container'}).find('div', {'class': 'row'}).find('div', {'class': 'item-display'}) \
     78            .find_all('div', {'class': 'item-in-grid'})
     79
     80        if len(phones) == 0:
     81            continue
     82
     83        for phone in phones:
     84            offer_url = 'https://ledikom.mk' + phone.find('a').get('href')
     85            image_url = phone.find('a').find('img').get('src')
     86            temp_offer_name = phone.find('div', {'class': 'item-name'}).find('a').get_text().strip()
     87            offer_name = ' '.join(temp_offer_name.split())
     88            brand = offer_name.split(' ')[0]
     89            price = int(phone.find('span', {'class': 'price'}).get_text().replace('ден.', '')
     90                        .replace('ден', '')
     91                        .replace('.', '').strip())
     92
     93            driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     94            driver1.get(offer_url)
     95            # getting offer page html
     96            offer_html = driver1.page_source
     97            driver1.close()
     98
     99            soup2 = BeautifulSoup(offer_html, 'html.parser')
     100
     101            specifications = soup2.find('div', {'id': 'content'}).find('section', {'class': 'padding-section'}) \
     102                .find_all('div', {'class': 'container'})[1].find('div', {'class': 'col-md-7'}) \
     103                .find_all('div', {'class': 'row'})
     104
     105            color = None
     106            rom_memory = None
     107            ram_memory = None
     108            back_camera = None
     109            operating_system = None
     110            chipset = None
     111            battery = None
     112            cpu = None
     113            front_camera = None
     114            offer_shop_code = None
     115            offer_description = None
     116
     117            if len(specifications) != 0:
     118                colors_tags = specifications[0].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a')
     119                temp_colors = []
     120                for color_tag in colors_tags:
     121                    temp_colors.append(color_tag.get_text().strip())
     122                color = ','.join(temp_colors)
     123
     124            if len(specifications) >= 2:
     125                temp_rom = specifications[1].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a')
     126                rom_list = []
     127                for rom in temp_rom:
     128                    rom_list.append(rom.get('title'))
     129                rom_memory = ','.join(rom_list)
     130
     131            if len(specifications) >= 3:
     132                temp_ram = specifications[2].find('div', {'class': 'col-md-12 col-xs-12'}).find_all('a')
     133                ram_list = []
     134                for ram in temp_ram:
     135                    ram_list.append(ram.get('title'))
     136
     137                ram_memory = ','.join(ram_list)
     138
     139            if 'Xiaomi' in brand:
     140                temp = color
     141                color = rom_memory
     142                rom_memory = temp
     143
     144                temp = ram_memory
     145                ram_memory = color
     146                color = temp
     147
     148            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     149                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     150                                         image_url,
     151                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     152
     153    for new_offer in new_offers:
     154        flag = False
     155        flag_price = False
     156        offer_id = None
     157
     158        for old_offer in database_offers:
     159
     160            if new_offer.offer_name == old_offer.offer_name:
     161                flag = True
     162                if new_offer.price != old_offer.price:
     163                    flag_price = True
     164                    offer_id = old_offer.offer_id
     165
     166        if flag:
     167            # print('ALREADY IN DATABASE')
     168            # print(new_offer)
     169            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     170            if flag_price:
     171                print('PRICE CHANGED!')  # CHANGE PRICE
     172                print('offer id: ' + str(offer_id))
     173                headers = {'Content-type': 'application/json'}
     174                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     175                             headers=headers)
     176        else:
     177            print('ADDED')  # ADD OFFER
     178            print(new_offer)
     179            headers = {'Content-type': 'application/json'}
     180            requests.post('http://localhost:8080/phoneoffer/addoffer',
     181                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     182
     183    print('------------------------------------')
    146184
    147185    for old_offer in database_offers:
    148 
    149         if new_offer.offer_name == old_offer.offer_name:
    150             flag = True
    151             if new_offer.price != old_offer.price:
    152                 flag_price = True
    153                 offer_id = old_offer.offer_id
    154 
    155     if flag:
    156         # print('ALREADY IN DATABASE')
    157         # print(new_offer)
    158         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    159         if flag_price:
    160             print('PRICE CHANGED!')  # CHANGE PRICE
    161             print('offer id: ' + str(offer_id))
    162             headers = {'Content-type': 'application/json'}
    163             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    164                          headers=headers)
    165     else:
    166         print('ADDED')  # ADD OFFER
    167         print(new_offer)
    168         headers = {'Content-type': 'application/json'}
    169         requests.post('http://localhost:8080/phoneoffer/addoffer',
    170                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    171 
    172 print('------------------------------------')
    173 
    174 for old_offer in database_offers:
    175     flag = False
    176     for new_offer in new_offers:
    177         if old_offer.offer_name == new_offer.offer_name:
    178             flag = True
    179 
    180     if not flag:
    181         print('OFFER DELETED')
    182         print(old_offer)
    183         # DELETE OFFER
    184         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     186        flag = False
     187        for new_offer in new_offers:
     188            if old_offer.offer_name == new_offer.offer_name:
     189                flag = True
     190
     191        if not flag:
     192            print('OFFER DELETED')
     193            print(old_offer)
     194            # DELETE OFFER
     195            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     196except Exception:
     197    traceback.print_exc()
     198    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     199                    ' VALUES (%s, %s, %s);'
     200    insert_value = (offer_shop, last_updated, 'failed')
     201    cur.execute(insert_script, insert_value)
     202    db_connection.commit()
     203    cur.close()
     204    db_connection.close()
     205else:
     206    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     207                    ' VALUES (%s, %s, %s);'
     208    insert_value = (offer_shop, last_updated, 'success')
     209    cur.execute(insert_script, insert_value)
     210    db_connection.commit()
     211    cur.close()
     212    db_connection.close()
     213
  • phonelux_scrappers/scrappers/mobelix_scrapper.py

    rffd50db r47f4eaf  
    33import unicodedata
    44from datetime import datetime
    5 
     5import traceback
    66import psycopg2
    77import config_read
     
    1919is_validated = False
    2020
    21 # Mobelix phone offers that are already in database
    22 
    23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text))
    24 
    25 database_offers = []
    26 
    27 for offer in offers:
    28     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    29                             offer['ram_memory'],
    30                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    31                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    32                             offer['image_url'],
    33                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    34                             offer['offer_description'],
    35                             offer['offer_shop_code'])
    36     database_offers.append(phoneOffer)
    37 
    38 new_offers = []
    39 
    40 for i in range(1, 17):
    41     mobelix_url = "https://mobelix.com.mk/mk/mobilni-telefoni?page=" + str(i)
    42 
    43     response1 = requests.get(mobelix_url)
    44     soup1 = BeautifulSoup(response1.content, 'html.parser')
    45 
    46     phones = soup1.find_all('div', {'class': 'p-2 rounded text-dark bg-white d-flex w-100'})
    47 
    48     for phone in phones:
    49         offer_url = phone.find('a').get('href')
    50         image_url = phone.find_all('div', {'class': 'col-12'})[0].find('img').get('src')
    51         brand = phone.find_all('div', {'class': 'col-12'})[1].find('h5', {'class': 'mb-0'}).get_text().strip()
    52         offer_name = phone.find_all('div', {'class': 'col-12'})[1] \
    53             .find('h3', {'class': 'h5 font-weight-normal'}).get_text().strip()
    54 
    55         if 'Watch' in offer_name or 'Pad' in offer_name or 'Tab' in offer_name or 'Pods' in offer_name or 'Buds' in offer_name or 'HomePod' in offer_name:
    56             continue
    57 
    58         if brand not in offer_name:
    59             offer_name = brand + " " + offer_name
    60 
    61         temp_prices = phone.find_all('div', {'class': 'col-12'})[1] \
    62             .find('p', {'class': 'h5 price'}).get_text(separator='/').strip()
    63 
    64         if len(temp_prices.split('/')) > 1:
    65             price = int(float(temp_prices.split('/')[1].replace(',', '').replace('ден', '').strip()))
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
     30
     31try:
     32    # Mobelix phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobelix').text))
     34
     35    database_offers = []
     36
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                    offer['ram_memory'],
     40                                    offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                    offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                    offer['image_url'],
     43                                    offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                    offer['offer_description'],
     45                                    offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
     47
     48    new_offers = []
     49
     50    for i in range(1, 17):
     51        mobelix_url = "https://mobelix.com.mk/mk/mobilni-telefoni?page=" + str(i)
     52
     53        response1 = requests.get(mobelix_url)
     54        soup1 = BeautifulSoup(response1.content, 'html.parser')
     55
     56        phones = soup1.find_all('div', {'class': 'p-2 rounded text-dark bg-white d-flex w-100'})
     57
     58        for phone in phones:
     59            offer_url = phone.find('a').get('href')
     60            image_url = phone.find_all('div', {'class': 'col-12'})[0].find('img').get('src')
     61            brand = phone.find_all('div', {'class': 'col-12'})[1].find('h5', {'class': 'mb-0'}).get_text().strip()
     62            offer_name = phone.find_all('div', {'class': 'col-12'})[1] \
     63                .find('h3', {'class': 'h5 font-weight-normal'}).get_text().strip()
     64
     65            if 'Watch' in offer_name or 'Pad' in offer_name or 'Tab' in offer_name or 'Pods' in offer_name or 'Buds' in offer_name or 'HomePod' in offer_name:
     66                continue
     67
     68            if brand not in offer_name:
     69                offer_name = brand + " " + offer_name
     70
     71            temp_prices = phone.find_all('div', {'class': 'col-12'})[1] \
     72                .find('p', {'class': 'h5 price'}).get_text(separator='/').strip()
     73
     74            if len(temp_prices.split('/')) > 1:
     75                price = int(float(temp_prices.split('/')[1].replace(',', '').replace('ден', '').strip()))
     76            else:
     77                price = int(float(temp_prices.split('/')[0].replace(',', '').replace('ден', '').strip()))
     78
     79            response2 = requests.get(offer_url)
     80            soup2 = BeautifulSoup(response2.content, 'html.parser')
     81
     82            colors_divs = soup2.find('div', {'class': 'color-wrapper mt-2 mb-1'}) \
     83                .find_all('div', {'class': 'color-box d-inline-block'})  # color div tags
     84
     85            temp_colors = []
     86            for div in colors_divs:
     87                temp_colors.append(div.get('title'))
     88
     89            color = ",".join(temp_colors)  # available colors for offer
     90
     91            tables = soup2.find('div', {'class': 'mobelix-specs table-white bordered-table'}).find_all('table')
     92
     93            operating_system = None
     94            chipset = None
     95            battery = None
     96            ram_memory = None
     97            rom_memory = None
     98            front_camera = ''
     99            back_camera = ''
     100            cpu = None
     101            offer_shop_code = None
     102            offer_description = None
     103
     104            for table in tables:
     105                for cell in table.find_all('td'):
     106                    if cell.get('data-spec') is None:
     107                        continue
     108
     109                    if cell.get('data-spec') == 'os':
     110                        operating_system = unicodedata.normalize('NFKD', cell.get_text().strip())
     111
     112                    if cell.get('data-spec') == 'chipset':
     113                        chipset = unicodedata.normalize('NFKD', cell.get_text().strip())
     114
     115                    if cell.get('data-spec') == 'cpu':
     116                        cpu = unicodedata.normalize('NFKD', cell.get_text().strip())
     117
     118                    if cell.get('data-spec') == 'internalmemory':
     119                        temp_rom = []
     120                        temp_ram = []
     121                        temp_internalmemory = unicodedata.normalize('NFKD', cell.get_text().strip())
     122                        for internalmemory in temp_internalmemory.split(','):
     123                            temp_rom.append(internalmemory.strip().split(' ')[0])
     124                            if len(internalmemory.strip().split(' ')) > 1:
     125                                temp_ram.append(internalmemory.strip().split(' ')[1])
     126                        rom_memory = ','.join(temp_rom)
     127                        ram_memory = ','.join(temp_ram)
     128
     129                    if cell.get('data-spec') == 'cam1modules' or cell.get('data-spec') == 'cam1features' or cell.get(
     130                                'data-spec') == 'cam1video':
     131                        back_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n'
     132
     133                    if cell.get('data-spec') == 'cam2modules' or cell.get('data-spec') == 'cam2features' or cell.get(
     134                                'data-spec') == 'cam2video':
     135                        front_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n'
     136
     137                    if cell.get('data-spec') == 'batdescription1':
     138                        battery = unicodedata.normalize('NFKD', cell.get_text().strip())
     139
     140            if front_camera == 'No':
     141                front_camera = None
     142
     143            if back_camera == 'No':
     144                back_camera = None
     145
     146            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     147                                            color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     148                                            image_url,
     149                                            offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     150
     151
     152    for new_offer in new_offers:
     153        flag = False
     154        flag_price = False
     155        offer_id = None
     156
     157        for old_offer in database_offers:
     158
     159            if new_offer.offer_name == old_offer.offer_name:
     160                flag = True
     161                if new_offer.price != old_offer.price:
     162                    flag_price = True
     163                    offer_id = old_offer.offer_id
     164
     165        if flag:
     166            # print('ALREADY IN DATABASE')
     167            # print(new_offer)
     168            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     169            if flag_price:
     170                print('PRICE CHANGED!')  # CHANGE PRICE
     171                print('offer id: ' + str(offer_id))
     172                headers = {'Content-type': 'application/json'}
     173                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     174                        headers=headers)
    66175        else:
    67             price = int(float(temp_prices.split('/')[0].replace(',', '').replace('ден', '').strip()))
    68 
    69         response2 = requests.get(offer_url)
    70         soup2 = BeautifulSoup(response2.content, 'html.parser')
    71 
    72         colors_divs = soup2.find('div', {'class': 'color-wrapper mt-2 mb-1'}) \
    73             .find_all('div', {'class': 'color-box d-inline-block'})  # color div tags
    74 
    75         temp_colors = []
    76         for div in colors_divs:
    77             temp_colors.append(div.get('title'))
    78 
    79         color = ",".join(temp_colors)  # available colors for offer
    80 
    81         tables = soup2.find('div', {'class': 'mobelix-specs table-white bordered-table'}).find_all('table')
    82 
    83         operating_system = None
    84         chipset = None
    85         battery = None
    86         ram_memory = None
    87         rom_memory = None
    88         front_camera = ''
    89         back_camera = ''
    90         cpu = None
    91         offer_shop_code = None
    92         offer_description = None
    93 
    94         for table in tables:
    95             for cell in table.find_all('td'):
    96                 if cell.get('data-spec') is None:
    97                     continue
    98 
    99                 if cell.get('data-spec') == 'os':
    100                     operating_system = unicodedata.normalize('NFKD', cell.get_text().strip())
    101 
    102                 if cell.get('data-spec') == 'chipset':
    103                     chipset = unicodedata.normalize('NFKD', cell.get_text().strip())
    104 
    105                 if cell.get('data-spec') == 'cpu':
    106                     cpu = unicodedata.normalize('NFKD', cell.get_text().strip())
    107 
    108                 if cell.get('data-spec') == 'internalmemory':
    109                     temp_rom = []
    110                     temp_ram = []
    111                     temp_internalmemory = unicodedata.normalize('NFKD', cell.get_text().strip())
    112                     for internalmemory in temp_internalmemory.split(','):
    113                         temp_rom.append(internalmemory.strip().split(' ')[0])
    114                         if len(internalmemory.strip().split(' ')) > 1:
    115                             temp_ram.append(internalmemory.strip().split(' ')[1])
    116                     rom_memory = ','.join(temp_rom)
    117                     ram_memory = ','.join(temp_ram)
    118 
    119                 if cell.get('data-spec') == 'cam1modules' or cell.get('data-spec') == 'cam1features' or cell.get(
    120                         'data-spec') == 'cam1video':
    121                     back_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n'
    122 
    123                 if cell.get('data-spec') == 'cam2modules' or cell.get('data-spec') == 'cam2features' or cell.get(
    124                         'data-spec') == 'cam2video':
    125                     front_camera += unicodedata.normalize('NFKD', cell.get_text().strip()) + '\n'
    126 
    127                 if cell.get('data-spec') == 'batdescription1':
    128                     battery = unicodedata.normalize('NFKD', cell.get_text().strip())
    129 
    130         if front_camera == 'No':
    131             front_camera = None
    132 
    133         if back_camera == 'No':
    134             back_camera = None
    135 
    136         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    137                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    138                                      image_url,
    139                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    140 
    141 
    142 for new_offer in new_offers:
    143     flag = False
    144     flag_price = False
    145     offer_id = None
     176            print('ADDED')  # ADD OFFER
     177            print(new_offer)
     178            headers = {'Content-type': 'application/json'}
     179            requests.post('http://localhost:8080/phoneoffer/addoffer',
     180                        headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     181
     182    print('------------------------------------')
    146183
    147184    for old_offer in database_offers:
    148 
    149         if new_offer.offer_name == old_offer.offer_name:
    150             flag = True
    151             if new_offer.price != old_offer.price:
    152                 flag_price = True
    153                 offer_id = old_offer.offer_id
    154 
    155     if flag:
    156         # print('ALREADY IN DATABASE')
    157         # print(new_offer)
    158         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    159         if flag_price:
    160             print('PRICE CHANGED!')  # CHANGE PRICE
    161             print('offer id: ' + str(offer_id))
    162             headers = {'Content-type': 'application/json'}
    163             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    164                          headers=headers)
    165     else:
    166         print('ADDED')  # ADD OFFER
    167         print(new_offer)
    168         headers = {'Content-type': 'application/json'}
    169         requests.post('http://localhost:8080/phoneoffer/addoffer',
    170                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    171 
    172 print('------------------------------------')
    173 
    174 for old_offer in database_offers:
    175     flag = False
    176     for new_offer in new_offers:
    177         if old_offer.offer_name == new_offer.offer_name:
    178             flag = True
    179 
    180     if not flag:
    181         print('OFFER DELETED')
    182         print(old_offer)
    183         # DELETE OFFER
    184         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     185        flag = False
     186        for new_offer in new_offers:
     187            if old_offer.offer_name == new_offer.offer_name:
     188                flag = True
     189
     190        if not flag:
     191            print('OFFER DELETED')
     192            print(old_offer)
     193            # DELETE OFFER
     194            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     195except Exception:
     196    traceback.print_exc()
     197    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     198                    ' VALUES (%s, %s, %s);'
     199    insert_value = (offer_shop, last_updated, 'failed')
     200    cur.execute(insert_script, insert_value)
     201    db_connection.commit()
     202    cur.close()
     203    db_connection.close()
     204else:
     205    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     206                    ' VALUES (%s, %s, %s);'
     207    insert_value = (offer_shop, last_updated, 'success')
     208    cur.execute(insert_script, insert_value)
     209    db_connection.commit()
     210    cur.close()
     211    db_connection.close()
     212
  • phonelux_scrappers/scrappers/mobigo_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1819is_validated = False
    1920
    20 # Mobi Go phone offers that are already in database
    21 
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text))
    23 
    24 database_offers = []
    25 
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
    36 
    37 new_offers = []
    38 
    39 
    40 for i in range(1, 6):
    41     mobigo_url = "https://mobigo.mk/page/" + str(i) + "/"
    42 
    43     response1 = requests.get(mobigo_url)
    44 
    45     soup1 = BeautifulSoup(response1.content, 'html.parser')
    46 
    47     phone_sections = soup1.find_all('ul', {'class': 'recent-posts'})
    48     phones = phone_sections[len(phone_sections) - 1].find_all('li')
    49 
    50     for phone in phones:
    51         offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href')  # offer url
    52         image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src')  # image url
    53         offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip()  # offer_name
    54 
    55         if "Watch" in offer_name or "Tab" in offer_name:  # if the product is watch or tablet, continue
    56             continue
    57 
    58         price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \
    59                           .get_text().replace('ден.', '').replace('.', '').strip()))  # price
    60 
    61         response2 = requests.get(offer_url)
    62         soup2 = BeautifulSoup(response2.content, 'html.parser')
    63 
    64         brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip()  # brand
    65 
    66         if brand not in offer_name:
    67             offer_name = brand + " " + offer_name
    68 
    69         specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr')
    70 
    71         ram_memory = None
    72         rom_memory = None
    73         battery = None
    74         back_camera = None
    75         front_camera = None
    76         chipset = None
    77         operating_system = None
    78         cpu = None
    79         offer_shop_code = None
    80         offer_description = None
    81         color = None
    82 
    83         for specification in specifications:
    84             if specification.find('td') == None:
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
     30
     31try:
     32    # Mobi Go phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobigo').text))
     34
     35    database_offers = []
     36
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
     47
     48    new_offers = []
     49
     50
     51    for i in range(1, 6):
     52        mobigo_url = "https://mobigo.mk/page/" + str(i) + "/"
     53
     54        response1 = requests.get(mobigo_url)
     55
     56        soup1 = BeautifulSoup(response1.content, 'html.parser')
     57
     58        phone_sections = soup1.find_all('ul', {'class': 'recent-posts'})
     59        phones = phone_sections[len(phone_sections) - 1].find_all('li')
     60
     61        for phone in phones:
     62            offer_url = phone.find('div', {'class', 'post-thumb'}).find('a').get('href')  # offer url
     63            image_url = phone.find('div', {'class', 'post-thumb'}).find('a').find('img').get('src')  # image url
     64            offer_name = phone.find('div', {'class', 'post-content'}).find_all('h2')[0].get_text().strip()  # offer_name
     65
     66            if "Watch" in offer_name or "Tab" in offer_name:  # if the product is watch or tablet, continue
    8567                continue
    8668
    87             # operating system
    88             if specification.find('td').get_text() == "Платформа":
    89                 if specification.find('i').get_text() != "/":
    90                     operating_system = specification.find('i').get_text().strip()
    91                 else:
    92                     operating_system = None
    93 
    94             # chipset
    95             if specification.find('td').get_text() == "Chipset":
    96                 if specification.find('i').get_text() != "/":
    97                     chipset = specification.find('i').get_text().strip()
    98                 else:
    99                     chipset = None
    100 
    101             # ram and rom memory
    102             if specification.find('td').get_text() == "Меморија":
    103                 if specification.find('i').get_text() != "/":
    104                     rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip()
    105                     ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip()
    106                 else:
    107                     rom_memory = None
    108                     ram_memory = None
    109 
    110             # back camera
    111             if specification.find('td').get_text() == "Главна Камера":
    112                 if specification.find('i').get_text() != "/":
    113                     back_camera = specification.find('i').get_text().strip()
    114                 else:
    115                     back_camera = None
    116 
    117             # front camera
    118             if specification.find('td').get_text() == "Селфи Камера":
    119                 if specification.find('i').get_text() != "/":
    120                     front_camera = specification.find('i').get_text().strip()
    121                 else:
    122                     front_camera = None
    123 
    124             # battery
    125             if specification.find('td').get_text() == "Батерија":
    126                 if specification.find('i').get_text() != "/":
    127                     battery = specification.find('i').get_text().strip()
    128                 else:
    129                     battery = None
    130 
    131         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    132                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    133                                      image_url,
    134                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    135 
    136 
    137 for new_offer in new_offers:
    138     flag = False
    139     flag_price = False
    140     offer_id = None
     69            price = int(float(phone.find('div', {'class', 'post-content'}).find_all('h2')[1] \
     70                              .get_text().replace('ден.', '').replace('.', '').strip()))  # price
     71
     72            response2 = requests.get(offer_url)
     73            soup2 = BeautifulSoup(response2.content, 'html.parser')
     74
     75            brand = soup2.find('a', {'rel': 'category tag'}).get_text().strip()  # brand
     76
     77            if brand not in offer_name:
     78                offer_name = brand + " " + offer_name
     79
     80            specifications = soup2.find('table', {'id': 'singlet'}).find_all('tr')
     81
     82            ram_memory = None
     83            rom_memory = None
     84            battery = None
     85            back_camera = None
     86            front_camera = None
     87            chipset = None
     88            operating_system = None
     89            cpu = None
     90            offer_shop_code = None
     91            offer_description = None
     92            color = None
     93
     94            for specification in specifications:
     95                if specification.find('td') == None:
     96                    continue
     97
     98                # operating system
     99                if specification.find('td').get_text() == "Платформа":
     100                    if specification.find('i').get_text() != "/":
     101                        operating_system = specification.find('i').get_text().strip()
     102                    else:
     103                        operating_system = None
     104
     105                # chipset
     106                if specification.find('td').get_text() == "Chipset":
     107                    if specification.find('i').get_text() != "/":
     108                        chipset = specification.find('i').get_text().strip()
     109                    else:
     110                        chipset = None
     111
     112                # ram and rom memory
     113                if specification.find('td').get_text() == "Меморија":
     114                    if specification.find('i').get_text() != "/":
     115                        rom_memory = specification.find('i').get_text().replace(',', '').split(' ')[0].strip()
     116                        ram_memory = specification.find('i').get_text().replace(',', '').split(' ')[1].strip()
     117                    else:
     118                        rom_memory = None
     119                        ram_memory = None
     120
     121                # back camera
     122                if specification.find('td').get_text() == "Главна Камера":
     123                    if specification.find('i').get_text() != "/":
     124                        back_camera = specification.find('i').get_text().strip()
     125                    else:
     126                        back_camera = None
     127
     128                # front camera
     129                if specification.find('td').get_text() == "Селфи Камера":
     130                    if specification.find('i').get_text() != "/":
     131                        front_camera = specification.find('i').get_text().strip()
     132                    else:
     133                        front_camera = None
     134
     135                # battery
     136                if specification.find('td').get_text() == "Батерија":
     137                    if specification.find('i').get_text() != "/":
     138                        battery = specification.find('i').get_text().strip()
     139                    else:
     140                        battery = None
     141
     142            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     143                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     144                                         image_url,
     145                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     146
     147
     148    for new_offer in new_offers:
     149        flag = False
     150        flag_price = False
     151        offer_id = None
     152
     153        for old_offer in database_offers:
     154
     155            if new_offer.offer_name == old_offer.offer_name:
     156                flag = True
     157                if new_offer.price != old_offer.price:
     158                    flag_price = True
     159                    offer_id = old_offer.offer_id
     160
     161        if flag:
     162            print('ALREADY IN DATABASE')
     163            print(new_offer)
     164            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     165            if flag_price:
     166                print('PRICE CHANGED!')  # CHANGE PRICE
     167                print('offer id: ' + str(offer_id))
     168                headers = {'Content-type': 'application/json'}
     169                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     170                             headers=headers)
     171        else:
     172            print('ADDED')  # ADD OFFER
     173            print(new_offer)
     174            headers = {'Content-type': 'application/json'}
     175            requests.post('http://localhost:8080/phoneoffer/addoffer',
     176                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     177
     178    print('------------------------------------')
    141179
    142180    for old_offer in database_offers:
    143 
    144         if new_offer.offer_name == old_offer.offer_name:
    145             flag = True
    146             if new_offer.price != old_offer.price:
    147                 flag_price = True
    148                 offer_id = old_offer.offer_id
    149 
    150     if flag:
    151         print('ALREADY IN DATABASE')
    152         print(new_offer)
    153         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    154         if flag_price:
    155             print('PRICE CHANGED!')  # CHANGE PRICE
    156             print('offer id: ' + str(offer_id))
    157             headers = {'Content-type': 'application/json'}
    158             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    159                          headers=headers)
    160     else:
    161         print('ADDED')  # ADD OFFER
    162         print(new_offer)
    163         headers = {'Content-type': 'application/json'}
    164         requests.post('http://localhost:8080/phoneoffer/addoffer',
    165                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    166 
    167 print('------------------------------------')
    168 
    169 for old_offer in database_offers:
    170     flag = False
    171     for new_offer in new_offers:
    172         if old_offer.offer_name == new_offer.offer_name:
    173             flag = True
    174 
    175     if not flag:
    176         print('OFFER DELETED')
    177         print(old_offer)
    178         # DELETE OFFER
    179         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     181        flag = False
     182        for new_offer in new_offers:
     183            if old_offer.offer_name == new_offer.offer_name:
     184                flag = True
     185
     186        if not flag:
     187            print('OFFER DELETED')
     188            print(old_offer)
     189            # DELETE OFFER
     190            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     191except Exception:
     192    traceback.print_exc()
     193    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     194                    ' VALUES (%s, %s, %s);'
     195    insert_value = (offer_shop, last_updated, 'failed')
     196    cur.execute(insert_script, insert_value)
     197    db_connection.commit()
     198    cur.close()
     199    db_connection.close()
     200else:
     201    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     202                    ' VALUES (%s, %s, %s);'
     203    insert_value = (offer_shop, last_updated, 'success')
     204    cur.execute(insert_script, insert_value)
     205    db_connection.commit()
     206    cur.close()
     207    db_connection.close()
  • phonelux_scrappers/scrappers/mobilezone_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1819is_validated = False
    1920
    20 # Mobile Zone phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    2130
    22 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text))
     31try:
     32    # Mobile Zone phone offers that are already in database
     33    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobilezone').text))
    2334
    24 database_offers = []
     35    database_offers = []
    2536
    26 for offer in offers:
    27     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    28                             offer['ram_memory'],
    29                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    30                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    31                             offer['image_url'],
    32                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    33                             offer['offer_description'],
    34                             offer['offer_shop_code'])
    35     database_offers.append(phoneOffer)
     37    for offer in offers:
     38        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     39                                offer['ram_memory'],
     40                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     41                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     42                                offer['image_url'],
     43                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     44                                offer['offer_description'],
     45                                offer['offer_shop_code'])
     46        database_offers.append(phoneOffer)
    3647
    37 new_offers = []
     48    new_offers = []
    3849
    39 for i in range(1, 3):
    40     mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'
     50    for i in range(1, 3):
     51        mobilezone_url = 'https://mobilezone.mk/produkt-kategorija/telefoni/novi-telefoni/page/' + str(i) + '/'
    4152
    42     response1 = requests.get(mobilezone_url)
    43     soup1 = BeautifulSoup(response1.content, 'html.parser')
     53        response1 = requests.get(mobilezone_url)
     54        soup1 = BeautifulSoup(response1.content, 'html.parser')
    4455
    45     phones = soup1.find('ul', {
    46         'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '
    47                  '--paginated columns-4'}).find_all('li')
     56        phones = soup1.find('ul', {
     57            'class': 'products columns-tablet-2 columns-mobile-2 --skin-proto rey-wcGap-default rey-wcGrid-default '
     58                     '--paginated columns-4'}).find_all('li')
    4859
    49     for phone in phones:
    50         offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(
    51             'href')
    52         image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \
    53             .find('img').get('data-lazy-src')
     60        for phone in phones:
     61            offer_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}).get(
     62                'href')
     63            image_url = phone.find('a', {'class': 'woocommerce-LoopProduct-link woocommerce-loop-product__link'}) \
     64                .find('img').get('data-lazy-src')
    5465
    55         brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})
     66            brand_section = phone.find('div', {'class': 'rey-productInner'}).find('div', {'class': 'rey-brandLink'})
    5667
    57         if brand_section is not None:
    58             brand = brand_section.find('a').get_text().strip()
     68            if brand_section is not None:
     69                brand = brand_section.find('a').get_text().strip()
     70            else:
     71                brand = None
     72
     73            offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip()
     74
     75            if brand is not None and brand not in offer_name:
     76                offer_name = brand + ' ' + offer_name
     77
     78            price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'})
     79            price = None
     80
     81            if price_tag is not None:
     82                price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text()
     83                                              .replace(',', '')
     84                                              .replace('ден', '').strip()))
     85            else:
     86                continue
     87
     88            response2 = requests.get(offer_url)
     89            soup2 = BeautifulSoup(response2.text, 'html.parser')
     90
     91            specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr')
     92
     93            back_camera = None
     94            front_camera = None
     95            rom_memory = None
     96            ram_memory = None
     97            operating_system = None
     98            cpu = None
     99            chipset = None
     100            offer_description = None
     101            offer_shop_code = None
     102            battery = None
     103            color = None
     104
     105            for specification in specifications:
     106                if 'Главна камера' in specification.find('th').get_text():
     107                    back_camera = specification.find('td').get_text().strip()
     108
     109                if 'Селфи камера' in specification.find('th').get_text():
     110                    front_camera = specification.find('td').get_text().strip()
     111
     112                if 'Батерија' in specification.find('th').get_text():
     113                    battery = specification.find('td').get_text().strip()
     114
     115                if 'Меморија' in specification.find('th').get_text():
     116                    rom_memory = specification.find('td').get_text().strip()
     117
     118                if 'Боја' in specification.find('th').get_text():
     119                    color = specification.find('td').get_text().strip()
     120
     121            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     122                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     123                                         image_url,
     124                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     125
     126    for new_offer in new_offers:
     127        flag = False
     128        flag_price = False
     129        offer_id = None
     130
     131        for old_offer in database_offers:
     132
     133            if new_offer.offer_name == old_offer.offer_name:
     134                flag = True
     135                if new_offer.price != old_offer.price:
     136                    flag_price = True
     137                    offer_id = old_offer.offer_id
     138
     139        if flag:
     140            # print('ALREADY IN DATABASE')
     141            # print(new_offer)
     142            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     143            if flag_price:
     144                print('PRICE CHANGED!')  # CHANGE PRICE
     145                print('offer id: ' + str(offer_id))
     146                headers = {'Content-type': 'application/json'}
     147                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     148                             headers=headers)
    59149        else:
    60             brand = None
     150            print('ADDED')  # ADD OFFER
     151            print(new_offer)
     152            headers = {'Content-type': 'application/json'}
     153            requests.post('http://localhost:8080/phoneoffer/addoffer',
     154                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    61155
    62         offer_name = phone.find('h2', {'class': 'woocommerce-loop-product__title'}).find('a').get_text().strip()
    63 
    64         if brand is not None and brand not in offer_name:
    65             offer_name = brand + ' ' + offer_name
    66 
    67         price_tag = phone.find('span', {'class': 'woocommerce-Price-amount amount'})
    68         price = None
    69 
    70         if price_tag is not None:
    71             price = int(unicodedata.normalize('NFKD', price_tag.find('bdi').get_text()
    72                                           .replace(',', '')
    73                                           .replace('ден', '').strip()))
    74         else:
    75             continue
    76 
    77         response2 = requests.get(offer_url)
    78         soup2 = BeautifulSoup(response2.text, 'html.parser')
    79 
    80         specifications = soup2.find('table', {'class': 'woocommerce-product-attributes shop_attributes'}).find_all('tr')
    81 
    82         back_camera = None
    83         front_camera = None
    84         rom_memory = None
    85         ram_memory = None
    86         operating_system = None
    87         cpu = None
    88         chipset = None
    89         offer_description = None
    90         offer_shop_code = None
    91         battery = None
    92         color = None
    93 
    94         for specification in specifications:
    95             if 'Главна камера' in specification.find('th').get_text():
    96                 back_camera = specification.find('td').get_text().strip()
    97 
    98             if 'Селфи камера' in specification.find('th').get_text():
    99                 front_camera = specification.find('td').get_text().strip()
    100 
    101             if 'Батерија' in specification.find('th').get_text():
    102                 battery = specification.find('td').get_text().strip()
    103 
    104             if 'Меморија' in specification.find('th').get_text():
    105                 rom_memory = specification.find('td').get_text().strip()
    106 
    107             if 'Боја' in specification.find('th').get_text():
    108                 color = specification.find('td').get_text().strip()
    109 
    110         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    111                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    112                                      image_url,
    113                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    114 
    115 for new_offer in new_offers:
    116     flag = False
    117     flag_price = False
    118     offer_id = None
     156    print('------------------------------------')
    119157
    120158    for old_offer in database_offers:
     159        flag = False
     160        for new_offer in new_offers:
     161            if old_offer.offer_name == new_offer.offer_name:
     162                flag = True
    121163
    122         if new_offer.offer_name == old_offer.offer_name:
    123             flag = True
    124             if new_offer.price != old_offer.price:
    125                 flag_price = True
    126                 offer_id = old_offer.offer_id
    127 
    128     if flag:
    129         # print('ALREADY IN DATABASE')
    130         # print(new_offer)
    131         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    132         if flag_price:
    133             print('PRICE CHANGED!')  # CHANGE PRICE
    134             print('offer id: ' + str(offer_id))
    135             headers = {'Content-type': 'application/json'}
    136             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    137                          headers=headers)
    138     else:
    139         print('ADDED')  # ADD OFFER
    140         print(new_offer)
    141         headers = {'Content-type': 'application/json'}
    142         requests.post('http://localhost:8080/phoneoffer/addoffer',
    143                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    144 
    145 print('------------------------------------')
    146 
    147 for old_offer in database_offers:
    148     flag = False
    149     for new_offer in new_offers:
    150         if old_offer.offer_name == new_offer.offer_name:
    151             flag = True
    152 
    153     if not flag:
    154         print('OFFER DELETED')
    155         print(old_offer)
    156         # DELETE OFFER
    157         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     164        if not flag:
     165            print('OFFER DELETED')
     166            print(old_offer)
     167            # DELETE OFFER
     168            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     169except Exception:
     170    traceback.print_exc()
     171    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     172                    ' VALUES (%s, %s, %s);'
     173    insert_value = (offer_shop, last_updated, 'failed')
     174    cur.execute(insert_script, insert_value)
     175    db_connection.commit()
     176    cur.close()
     177    db_connection.close()
     178else:
     179    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     180                    ' VALUES (%s, %s, %s);'
     181    insert_value = (offer_shop, last_updated, 'success')
     182    cur.execute(insert_script, insert_value)
     183    db_connection.commit()
     184    cur.close()
     185    db_connection.close()
  • phonelux_scrappers/scrappers/mobitech_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1415sys.stdout = open(file_path, "w")
    1516
    16 
    17 mobitech_url = "https://mobitech.mk/shop/"
    18 
    19 response1 = requests.get(mobitech_url)
    20 
    21 soup1 = BeautifulSoup(response1.content, 'html.parser')
    22 
    23 phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})
    24 
    2517offer_shop = "Mobitech"  # offer shop
    2618last_updated = datetime.now().date()
    2719is_validated = False
    2820
    29 # Mobitech phone offers that are already in database
     21# Call to read the configuration file and connect to database
     22cinfo = config_read.get_databaseconfig("../postgresdb.config")
     23db_connection = psycopg2.connect(
     24    database=cinfo[0],
     25    host=cinfo[1],
     26    user=cinfo[2],
     27    password=cinfo[3]
     28)
     29cur = db_connection.cursor()
    3030
    31 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text))
     31try:
     32    mobitech_url = "https://mobitech.mk/shop/"
    3233
    33 database_offers = []
     34    response1 = requests.get(mobitech_url)
    3435
    35 for offer in offers:
    36     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    37                             offer['ram_memory'],
    38                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    39                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    40                             offer['image_url'],
    41                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    42                             offer['offer_description'],
    43                             offer['offer_shop_code'])
    44     database_offers.append(phoneOffer)
     36    soup1 = BeautifulSoup(response1.content, 'html.parser')
    4537
    46 new_offers = []
     38    phones = soup1.find_all('div', {'class': 'jet-woo-products__inner-box'})
    4739
    48 for phone in phones:
    49     offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href')  # url
    50     image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src')  # image
    51     brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip()  # brand
    52     offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip()  # offer_name
    53     if brand not in offer_name:
    54         offer_name = brand+" "+offer_name
    55     temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi')
    56     price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price
     40    # Mobitech phone offers that are already in database
     41    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/mobitech').text))
    5742
    58     response2 = requests.get(offer_url)
    59     soup2 = BeautifulSoup(response2.content, 'html.parser')
     43    database_offers = []
    6044
    61     specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'})
     45    for offer in offers:
     46        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     47                                offer['ram_memory'],
     48                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     49                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     50                                offer['image_url'],
     51                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     52                                offer['offer_description'],
     53                                offer['offer_shop_code'])
     54        database_offers.append(phoneOffer)
    6255
    63     ram_memory = None
    64     rom_memory = None
    65     battery = None
    66     back_camera = None
    67     front_camera = None
    68     operating_system = None
    69     chipset = None
    70     color = None
    71     offer_shop_code = None
    72     cpu = None
    73     offer_description = None
     56    new_offers = []
    7457
    75     for specification in specifications:
    76         # rom memory
    77         if specification.get_text().startswith("Меморија:"):
    78             rom_memory = specification.get_text().split("Меморија:")[1].strip()
    79             if rom_memory == "Нема" or rom_memory == "/":
    80                 rom_memory = None
     58    for phone in phones:
     59        offer_url = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get('href')  # url
     60        image_url = phone.find('div', {'class': 'jet-woo-product-thumbnail'}).find('img').get('src')  # image
     61        brand = phone.find_next('div', {'class': 'jet-woo-product-categories'}).find('a').get_text().strip()  # brand
     62        offer_name = phone.find('h5', {'class': 'jet-woo-product-title'}).find('a').get_text().strip()  # offer_name
     63        if brand not in offer_name:
     64            offer_name = brand+" "+offer_name
     65        temp_prices = phone.find('div', {'class': 'jet-woo-product-price'}).find_all('bdi')
     66        price = int(float(temp_prices[len(temp_prices) - 1].get_text().replace("ден", "").replace(",", "").strip())) # price
    8167
    82         # ram memory
    83         if specification.get_text().startswith("РАМ Меморија:"):
    84             ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\
    85                 .replace('Ram', '').strip()
    86             if ram_memory == "Нема" or ram_memory == "/":
    87                 ram_memory = None
     68        response2 = requests.get(offer_url)
     69        soup2 = BeautifulSoup(response2.content, 'html.parser')
    8870
    89         # camera
    90         if specification.get_text().startswith("Камера:"):
    91             back_camera = specification.get_text().split("Камера:")[1].strip()
    92             if back_camera == "Нема":
    93                 back_camera = None
     71        specifications = soup2.find_all('h2', {'class': 'elementor-heading-title elementor-size-default'})
    9472
    95         # operating system
    96         if specification.get_text().startswith("Оперативен систем:"):
    97             operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip()
    98             if operating_system == "Нема":
    99                 operating_system = None
     73        ram_memory = None
     74        rom_memory = None
     75        battery = None
     76        back_camera = None
     77        front_camera = None
     78        operating_system = None
     79        chipset = None
     80        color = None
     81        offer_shop_code = None
     82        cpu = None
     83        offer_description = None
    10084
    101         # battery
    102         if specification.get_text().startswith("Батерија:"):
    103             battery = specification.get_text().split("Батерија:")[1].strip()
    104             if battery == "Нема":
    105                 battery = None
     85        for specification in specifications:
     86            # rom memory
     87            if specification.get_text().startswith("Меморија:"):
     88                rom_memory = specification.get_text().split("Меморија:")[1].strip()
     89                if rom_memory == "Нема" or rom_memory == "/":
     90                    rom_memory = None
    10691
    107     new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    108                                  color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    109                                  image_url,
    110                                  offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     92            # ram memory
     93            if specification.get_text().startswith("РАМ Меморија:"):
     94                ram_memory = specification.get_text().split("РАМ Меморија:")[1].replace('RAM', '')\
     95                    .replace('Ram', '').strip()
     96                if ram_memory == "Нема" or ram_memory == "/":
     97                    ram_memory = None
    11198
    112 for new_offer in new_offers:
    113     flag = False
    114     flag_price = False
    115     offer_id = None
     99            # camera
     100            if specification.get_text().startswith("Камера:"):
     101                back_camera = specification.get_text().split("Камера:")[1].strip()
     102                if back_camera == "Нема":
     103                    back_camera = None
     104
     105            # operating system
     106            if specification.get_text().startswith("Оперативен систем:"):
     107                operating_system = specification.get_text().split("Оперативен систем:")[1].split(",")[0].strip()
     108                if operating_system == "Нема":
     109                    operating_system = None
     110
     111            # battery
     112            if specification.get_text().startswith("Батерија:"):
     113                battery = specification.get_text().split("Батерија:")[1].strip()
     114                if battery == "Нема":
     115                    battery = None
     116
     117        new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     118                                     color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     119                                     image_url,
     120                                     offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     121
     122    for new_offer in new_offers:
     123        flag = False
     124        flag_price = False
     125        offer_id = None
     126
     127        for old_offer in database_offers:
     128
     129            if new_offer.offer_name == old_offer.offer_name:
     130                flag = True
     131                if new_offer.price != old_offer.price:
     132                    flag_price = True
     133                    offer_id = old_offer.offer_id
     134
     135        if flag:
     136            print('ALREADY IN DATABASE')
     137            print(new_offer)
     138            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     139            if flag_price:
     140                print('PRICE CHANGED!')  # CHANGE PRICE
     141                print('offer id: ' + str(offer_id))
     142                headers = {'Content-type': 'application/json'}
     143                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     144                             headers=headers)
     145        else:
     146            print('ADDED')  # ADD OFFER
     147            print(new_offer)
     148            headers = {'Content-type': 'application/json'}
     149            requests.post('http://localhost:8080/phoneoffer/addoffer',
     150                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     151
     152    print('------------------------------------')
    116153
    117154    for old_offer in database_offers:
     155        flag = False
     156        for new_offer in new_offers:
     157            if old_offer.offer_name == new_offer.offer_name:
     158                flag = True
    118159
    119         if new_offer.offer_name == old_offer.offer_name:
    120             flag = True
    121             if new_offer.price != old_offer.price:
    122                 flag_price = True
    123                 offer_id = old_offer.offer_id
     160        if not flag:
     161            print('OFFER DELETED')
     162            print(old_offer)
     163            # DELETE OFFER
     164            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     165except Exception:
     166    traceback.print_exc()
     167    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     168                    ' VALUES (%s, %s, %s);'
     169    insert_value = (offer_shop, last_updated, 'failed')
     170    cur.execute(insert_script, insert_value)
     171    db_connection.commit()
     172    cur.close()
     173    db_connection.close()
     174else:
     175    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     176                    ' VALUES (%s, %s, %s);'
     177    insert_value = (offer_shop, last_updated, 'success')
     178    cur.execute(insert_script, insert_value)
     179    db_connection.commit()
     180    cur.close()
     181    db_connection.close()
    124182
    125     if flag:
    126         print('ALREADY IN DATABASE')
    127         print(new_offer)
    128         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    129         if flag_price:
    130             print('PRICE CHANGED!')  # CHANGE PRICE
    131             print('offer id: ' + str(offer_id))
    132             headers = {'Content-type': 'application/json'}
    133             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    134                          headers=headers)
    135     else:
    136         print('ADDED')  # ADD OFFER
    137         print(new_offer)
    138         headers = {'Content-type': 'application/json'}
    139         requests.post('http://localhost:8080/phoneoffer/addoffer',
    140                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    141 
    142 print('------------------------------------')
    143 
    144 for old_offer in database_offers:
    145     flag = False
    146     for new_offer in new_offers:
    147         if old_offer.offer_name == new_offer.offer_name:
    148             flag = True
    149 
    150     if not flag:
    151         print('OFFER DELETED')
    152         print(old_offer)
    153         # DELETE OFFER
    154         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
    155 
  • phonelux_scrappers/scrappers/neptun_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1920is_validated = False
    2021
    21 # Neptun phone offers that are already in database
     22# Call to read the configuration file and connect to database
     23cinfo = config_read.get_databaseconfig("../postgresdb.config")
     24db_connection = psycopg2.connect(
     25    database=cinfo[0],
     26    host=cinfo[1],
     27    user=cinfo[2],
     28    password=cinfo[3]
     29)
     30cur = db_connection.cursor()
    2231
    23 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text))
     32try:
     33    # Neptun phone offers that are already in database
     34    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/neptun').text))
    2435
    25 database_offers = []
     36    database_offers = []
    2637
    27 for offer in offers:
    28     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    29                             offer['ram_memory'],
    30                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    31                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    32                             offer['image_url'],
    33                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    34                             offer['offer_description'],
    35                             offer['offer_shop_code'])
    36     database_offers.append(phoneOffer)
     38    for offer in offers:
     39        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     40                                offer['ram_memory'],
     41                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     42                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     43                                offer['image_url'],
     44                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     45                                offer['offer_description'],
     46                                offer['offer_shop_code'])
     47        database_offers.append(phoneOffer)
    3748
    38 new_offers = []
     49    new_offers = []
    3950
    40 for i in range(1, 11):
    41     neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i)
     51    for i in range(1, 11):
     52        neptun_url = 'https://www.neptun.mk/mobilni_telefoni.nspx?page=' + str(i)
    4253
    43     # selenium is used because of the dynamic content of the page
    44     driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    45     driver1.get(neptun_url)
    46     neptun_html = driver1.page_source
     54        # selenium is used because of the dynamic content of the page
     55        driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     56        driver1.get(neptun_url)
     57        neptun_html = driver1.page_source
    4758
    48     # closing the driver so the safari instance can pair with another webdriver session
    49     driver1.close()
    50 
    51     # response1 = requests.get(neptun_url)
    52     soup1 = BeautifulSoup(neptun_html, 'html.parser')
    53 
    54     phones = soup1.find('div', {'id': 'mainContainer'}).find('div',
    55                                                              {'class': 'col-lg-9 col-md-9 col-sm-8 col-fix-main'}) \
    56         .find_all('div', {'class': 'ng-scope product-list-item-grid'})
    57 
    58     for phone in phones:
    59         offer_url = 'https://www.neptun.mk' + phone.find('a').get('href')
    60         offer_name = phone.find('a').find('h2').get_text().replace('MOB.TEL.', '').strip()
    61         brand = offer_name.split(' ')[0].strip().capitalize()
    62         image_url = 'https://www.neptun.mk' + phone.find('a').find('div', {'class': 'row'}).find('img').get('src')
    63         price = int(
    64             phone.find('div', {'class': 'col-sm-12 static'}).find('div', {'class': 'product-list-item__prices pt35'})
    65             .find('div', {'class': 'row'}).find('div', {'class': 'newPriceModel'}) \
    66             .find('span', {'class': 'product-price__amount--value ng-binding'}).get_text().replace('.', ''))
    67 
    68         driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    69         driver1.get(offer_url)
    70         offer_html = driver1.page_source
    7159        # closing the driver so the safari instance can pair with another webdriver session
    7260        driver1.close()
    7361
    74         soup2 = BeautifulSoup(offer_html, 'html.parser')
     62        # response1 = requests.get(neptun_url)
     63        soup1 = BeautifulSoup(neptun_html, 'html.parser')
    7564
    76         offer_shop_code = soup2.find('div', {'ng-if': 'showProductDetails'}) \
    77             .find('div', {'class': 'product-details-first-row'}).find('span', {
    78             'ng-bind': 'model.CodeNumber'}).get_text().strip()
     65        phones = soup1.find('div', {'id': 'mainContainer'}).find('div',
     66                                                                 {'class': 'col-lg-9 col-md-9 col-sm-8 col-fix-main'}) \
     67            .find_all('div', {'class': 'ng-scope product-list-item-grid'})
    7968
    80         specifications_table = \
    81             soup2.find('div', {'id': 'mainContainer'}).find('div', {'ng-if': 'showProductDetails'}).find_all('ul')[-1]
    82         specifications = specifications_table.get_text(separator='\n').strip().split("\n")
     69        for phone in phones:
     70            offer_url = 'https://www.neptun.mk' + phone.find('a').get('href')
     71            offer_name = phone.find('a').find('h2').get_text().replace('MOB.TEL.', '').strip()
     72            brand = offer_name.split(' ')[0].strip().capitalize()
     73            image_url = 'https://www.neptun.mk' + phone.find('a').find('div', {'class': 'row'}).find('img').get('src')
     74            price = int(
     75                phone.find('div', {'class': 'col-sm-12 static'}).find('div', {'class': 'product-list-item__prices pt35'})
     76                .find('div', {'class': 'row'}).find('div', {'class': 'newPriceModel'}) \
     77                .find('span', {'class': 'product-price__amount--value ng-binding'}).get_text().replace('.', ''))
    8378
    84         offer_description = specifications_table.get_text(separator='\n').strip()
     79            driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     80            driver1.get(offer_url)
     81            offer_html = driver1.page_source
     82            # closing the driver so the safari instance can pair with another webdriver session
     83            driver1.close()
    8584
    86         back_camera = None
    87         operating_system = None
    88         chipset = None
    89         battery = None
    90         ram_memory = None
    91         rom_memory = None
    92         cpu = None
    93         front_camera = None
    94         color = None
     85            soup2 = BeautifulSoup(offer_html, 'html.parser')
    9586
    96         for specification in specifications:
    97             if 'Батерија:' in specification:
    98                 battery = specification.split('Батерија:')[1]
     87            offer_shop_code = soup2.find('div', {'ng-if': 'showProductDetails'}) \
     88                .find('div', {'class': 'product-details-first-row'}).find('span', {
     89                'ng-bind': 'model.CodeNumber'}).get_text().strip()
    9990
    100             if 'CPU:' in specification:
    101                 cpu = specification.split('CPU:')[1]
     91            specifications_table = \
     92                soup2.find('div', {'id': 'mainContainer'}).find('div', {'ng-if': 'showProductDetails'}).find_all('ul')[-1]
     93            specifications = specifications_table.get_text(separator='\n').strip().split("\n")
    10294
    103             if 'Chipset:' in specification:
    104                 chipset = specification.split('Chipset:')[1]
     95            offer_description = specifications_table.get_text(separator='\n').strip()
    10596
    106             if 'RAM Меморија:' in specification:
    107                 ram_memory = specification.split('RAM Меморија:')[1]
    108                 continue
     97            back_camera = None
     98            operating_system = None
     99            chipset = None
     100            battery = None
     101            ram_memory = None
     102            rom_memory = None
     103            cpu = None
     104            front_camera = None
     105            color = None
    109106
    110             if 'ROM Меморија:' in specification:
    111                 rom_memory = specification.split('ROM Меморија:')[1]
    112                 continue
     107            for specification in specifications:
     108                if 'Батерија:' in specification:
     109                    battery = specification.split('Батерија:')[1]
    113110
    114             if 'ROM:' in specification:
    115                 rom_memory = specification.split('ROM:')[1]
     111                if 'CPU:' in specification:
     112                    cpu = specification.split('CPU:')[1]
    116113
    117             if 'RAM:' in specification:
    118                 ram_memory = specification.split('RAM:')[1]
     114                if 'Chipset:' in specification:
     115                    chipset = specification.split('Chipset:')[1]
    119116
    120             if 'iOS' in specification or 'Android' in specification:
    121                 operating_system = specification
     117                if 'RAM Меморија:' in specification:
     118                    ram_memory = specification.split('RAM Меморија:')[1]
     119                    continue
    122120
    123         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    124                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    125                                      image_url,
    126                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     121                if 'ROM Меморија:' in specification:
     122                    rom_memory = specification.split('ROM Меморија:')[1]
     123                    continue
    127124
    128 for new_offer in new_offers:
    129     flag = False
    130     flag_price = False
    131     offer_id = None
     125                if 'ROM:' in specification:
     126                    rom_memory = specification.split('ROM:')[1]
     127
     128                if 'RAM:' in specification:
     129                    ram_memory = specification.split('RAM:')[1]
     130
     131                if 'iOS' in specification or 'Android' in specification:
     132                    operating_system = specification
     133
     134            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     135                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     136                                         image_url,
     137                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     138
     139    for new_offer in new_offers:
     140        flag = False
     141        flag_price = False
     142        offer_id = None
     143
     144        for old_offer in database_offers:
     145
     146            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     147                flag = True
     148                if new_offer.price != old_offer.price:
     149                    flag_price = True
     150                    offer_id = old_offer.offer_id
     151
     152        if flag:
     153            # print('ALREADY IN DATABASE')
     154            # print(new_offer)
     155            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     156            if flag_price:
     157                print('PRICE CHANGED!')  # CHANGE PRICE
     158                print('offer id: ' + str(offer_id))
     159                headers = {'Content-type': 'application/json'}
     160                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     161                             headers=headers)
     162        else:
     163            print('ADDED')  # ADD OFFER
     164            print(new_offer)
     165            headers = {'Content-type': 'application/json'}
     166            requests.post('http://localhost:8080/phoneoffer/addoffer',
     167                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     168
     169    print('------------------------------------')
    132170
    133171    for old_offer in database_offers:
     172        flag = False
     173        for new_offer in new_offers:
     174            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     175                flag = True
    134176
    135         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    136             flag = True
    137             if new_offer.price != old_offer.price:
    138                 flag_price = True
    139                 offer_id = old_offer.offer_id
    140 
    141     if flag:
    142         # print('ALREADY IN DATABASE')
    143         # print(new_offer)
    144         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    145         if flag_price:
    146             print('PRICE CHANGED!')  # CHANGE PRICE
    147             print('offer id: ' + str(offer_id))
    148             headers = {'Content-type': 'application/json'}
    149             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    150                          headers=headers)
    151     else:
    152         print('ADDED')  # ADD OFFER
    153         print(new_offer)
    154         headers = {'Content-type': 'application/json'}
    155         requests.post('http://localhost:8080/phoneoffer/addoffer',
    156                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    157 
    158 print('------------------------------------')
    159 
    160 for old_offer in database_offers:
    161     flag = False
    162     for new_offer in new_offers:
    163         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    164             flag = True
    165 
    166     if not flag:
    167         print('OFFER DELETED')
    168         print(old_offer)
    169         # DELETE OFFER
    170         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     177        if not flag:
     178            print('OFFER DELETED')
     179            print(old_offer)
     180            # DELETE OFFER
     181            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     182except Exception:
     183    traceback.print_exc()
     184    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     185                    ' VALUES (%s, %s, %s);'
     186    insert_value = (offer_shop, last_updated, 'failed')
     187    cur.execute(insert_script, insert_value)
     188    db_connection.commit()
     189    cur.close()
     190    db_connection.close()
     191else:
     192    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     193                    ' VALUES (%s, %s, %s);'
     194    insert_value = (offer_shop, last_updated, 'success')
     195    cur.execute(insert_script, insert_value)
     196    db_connection.commit()
     197    cur.close()
     198    db_connection.close()
  • phonelux_scrappers/scrappers/outputfile.txt

    rffd50db r47f4eaf  
    1 ADDED
    2 {'offer_shop': 'Mobile Zone', 'offer_name': 'Apple iPhone 14 Pro', 'price': 95499, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Златна, Розева, Сива, Црна', 'front_camera': '12MP', 'back_camera': '48 Mp + 12 Mp + 12 Mp', 'chipset': None, 'battery': '3200mAh', 'operating_system': None, 'cpu': None, 'image_url': 'https://i0.wp.com/mobilezone.mk/wp-content/uploads/2022/09/14-pro-silver.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/iphone-14-pro/', 'last_updated': datetime.date(2022, 10, 1), 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
    3 ------------------------------------
    4 OFFER DELETED
    5 {'offer_id': 1179, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung s20 FE', 'price': 24699, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Сина', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/Samsung-Galaxy-S20-FE-blue.png?resize=512%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s20-fe/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
    6 OFFER DELETED
    7 {'offer_id': 1181, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung Z Flip3 5G', 'price': 39999, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i2.wp.com/mobilezone.mk/wp-content/uploads/2022/03/11.png?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-z-flip3-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
    8 OFFER DELETED
    9 {'offer_id': 1180, 'offer_shop': 'Mobile Zone', 'offer_name': 'Samsung S21 FE 5G', 'price': 30899, 'ram_memory': None, 'rom_memory': '128GB', 'color': 'Зелена, Црна', 'front_camera': None, 'back_camera': None, 'chipset': None, 'battery': None, 'operating_system': None, 'cpu': None, 'image_url': 'https://i1.wp.com/mobilezone.mk/wp-content/uploads/2022/03/5g.jpg?resize=600%2C600&ssl=1', 'offer_url': 'https://mobilezone.mk/produkti/samsung-s21-fe-5g/', 'last_updated': '2022-07-29T22:00:00.000+00:00', 'is_validated': False, 'offer_description': None, 'offer_shop_code': None}
  • phonelux_scrappers/scrappers/setec_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    1718is_validated = False
    1819
    19 # Setec phone offers that are already in database
     20# Call to read the configuration file and connect to database
     21cinfo = config_read.get_databaseconfig("../postgresdb.config")
     22db_connection = psycopg2.connect(
     23    database=cinfo[0],
     24    host=cinfo[1],
     25    user=cinfo[2],
     26    password=cinfo[3]
     27)
     28cur = db_connection.cursor()
    2029
    21 offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text))
     30try:
     31    # Setec phone offers that are already in database
     32    offers = json.loads(unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/setec').text))
    2233
    23 database_offers = []
     34    database_offers = []
    2435
    25 for offer in offers:
    26     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    27                             offer['ram_memory'],
    28                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    29                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    30                             offer['image_url'],
    31                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    32                             offer['offer_description'],
    33                             offer['offer_shop_code'])
    34     database_offers.append(phoneOffer)
     36    for offer in offers:
     37        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     38                                offer['ram_memory'],
     39                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     40                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     41                                offer['image_url'],
     42                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     43                                offer['offer_description'],
     44                                offer['offer_shop_code'])
     45        database_offers.append(phoneOffer)
    3546
    36 new_offers = []
     47    new_offers = []
    3748
    38 for i in range(1, 9):
    39     setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)
     49    for i in range(1, 9):
     50        setec_url = 'https://setec.mk/index.php?route=product/category&path=10066_10067&page=' + str(i)
    4051
    41     response1 = requests.get(setec_url)
    42     soup1 = BeautifulSoup(response1.content, 'html.parser')
     52        response1 = requests.get(setec_url)
     53        soup1 = BeautifulSoup(response1.content, 'html.parser')
    4354
    44     phones = soup1.find('div', {'id': 'mfilter-content-container'}) \
    45         .find_all('div', {'class': 'col-sm-4 col-xs-6'})
     55        phones = soup1.find('div', {'id': 'mfilter-content-container'}) \
     56            .find_all('div', {'class': 'col-sm-4 col-xs-6'})
    4657
    47     for phone in phones:
    48         offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')
    49         image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')
    50         offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()
    51         brand = offer_name.split(' ')[0]
     58        for phone in phones:
     59            offer_url = phone.find('div', {'class': 'left'}).find('a').get('href')
     60            image_url = phone.find('div', {'class': 'left'}).find('a').find('img').get('src')
     61            offer_name = phone.find('div', {'class': 'right'}).find('div', {'class': 'name'}).find('a').get_text().strip()
     62            brand = offer_name.split(' ')[0]
    5263
    53         back_camera = None
    54         operating_system = None
    55         chipset = None
    56         battery = None
    57         ram_memory = None
    58         rom_memory = None
    59         cpu = None
    60         front_camera = None
    61         color = None
     64            back_camera = None
     65            operating_system = None
     66            chipset = None
     67            battery = None
     68            ram_memory = None
     69            rom_memory = None
     70            cpu = None
     71            front_camera = None
     72            color = None
    6273
    63         if 'Cable' in offer_name or 'AirTag' in offer_name:
    64             continue
     74            if 'Cable' in offer_name or 'AirTag' in offer_name:
     75                continue
    6576
    66         if brand not in offer_name:
    67             offer_name = brand + " " + offer_name
     77            if brand not in offer_name:
     78                offer_name = brand + " " + offer_name
    6879
    69         offer_shop_code = phone.find('div', {'class': 'right'}) \
    70             .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()
     80            offer_shop_code = phone.find('div', {'class': 'right'}) \
     81                .find('div', {'class': 'shifra'}).get_text().replace('Шифра:', '').strip()
    7182
    72         price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
    73             find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})
     83            price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
     84                find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'price-old-new'})
    7485
    75         if price_tag is None:
    76             price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
    77                 find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})
     86            if price_tag is None:
     87                price_tag = phone.find('div', {'class': 'right'}).find('div', {'class': 'price'}). \
     88                    find('div', {'class': 'category-price-redovna'}).find('span', {'class': 'cena_za_kesh'})
    7889
    79         price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())
     90            price = int(price_tag.get_text().replace('Ден.', '').replace(',', '').strip())
    8091
    81         response2 = requests.get(offer_url)
    82         soup2 = BeautifulSoup(response2.content, 'html.parser')
     92            response2 = requests.get(offer_url)
     93            soup2 = BeautifulSoup(response2.content, 'html.parser')
    8394
    84         offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')
     95            offer_description = soup2.find('div', {'id': 'tab-description'}).get_text(separator='\n')
    8596
    86         new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
    87                                      color, front_camera, back_camera, chipset, battery, operating_system, cpu,
    88                                      image_url,
    89                                      offer_url, last_updated, is_validated, offer_description, offer_shop_code))
     97            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     98                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     99                                         image_url,
     100                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    90101
    91 for new_offer in new_offers:
    92     flag = False
    93     flag_price = False
    94     offer_id = None
     102    for new_offer in new_offers:
     103        flag = False
     104        flag_price = False
     105        offer_id = None
     106
     107        for old_offer in database_offers:
     108
     109            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     110                flag = True
     111                if new_offer.price != old_offer.price:
     112                    flag_price = True
     113                    offer_id = old_offer.offer_id
     114
     115        if flag:
     116            # print('ALREADY IN DATABASE')
     117            # print(new_offer)
     118            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     119            if flag_price:
     120                print('PRICE CHANGED!')  # CHANGE PRICE
     121                print('offer id: ' + str(offer_id))
     122                headers = {'Content-type': 'application/json'}
     123                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     124                             headers=headers)
     125        else:
     126            print('ADDED')  # ADD OFFER
     127            print(new_offer)
     128            headers = {'Content-type': 'application/json'}
     129            requests.post('http://localhost:8080/phoneoffer/addoffer',
     130                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     131
     132    print('------------------------------------')
    95133
    96134    for old_offer in database_offers:
     135        flag = False
     136        for new_offer in new_offers:
     137            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     138                flag = True
    97139
    98         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    99             flag = True
    100             if new_offer.price != old_offer.price:
    101                 flag_price = True
    102                 offer_id = old_offer.offer_id
    103 
    104     if flag:
    105         # print('ALREADY IN DATABASE')
    106         # print(new_offer)
    107         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    108         if flag_price:
    109             print('PRICE CHANGED!')  # CHANGE PRICE
    110             print('offer id: ' + str(offer_id))
    111             headers = {'Content-type': 'application/json'}
    112             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    113                          headers=headers)
    114     else:
    115         print('ADDED')  # ADD OFFER
    116         print(new_offer)
    117         headers = {'Content-type': 'application/json'}
    118         requests.post('http://localhost:8080/phoneoffer/addoffer',
    119                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    120 
    121 print('------------------------------------')
    122 
    123 for old_offer in database_offers:
    124     flag = False
    125     for new_offer in new_offers:
    126         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    127             flag = True
    128 
    129     if not flag:
    130         print('OFFER DELETED')
    131         print(old_offer)
    132         # DELETE OFFER
    133         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     140        if not flag:
     141            print('OFFER DELETED')
     142            print(old_offer)
     143            # DELETE OFFER
     144            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     145except Exception:
     146    traceback.print_exc()
     147    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     148                    ' VALUES (%s, %s, %s);'
     149    insert_value = (offer_shop, last_updated, 'failed')
     150    cur.execute(insert_script, insert_value)
     151    db_connection.commit()
     152    cur.close()
     153    db_connection.close()
     154else:
     155    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     156                    ' VALUES (%s, %s, %s);'
     157    insert_value = (offer_shop, last_updated, 'success')
     158    cur.execute(insert_script, insert_value)
     159    db_connection.commit()
     160    cur.close()
     161    db_connection.close()
  • phonelux_scrappers/scrappers/tehnomarket_scrapper.py

    rffd50db r47f4eaf  
    11import json
     2import traceback
    23import unicodedata
    34from datetime import datetime
     
    7677
    7778
    78 # Tehnomarket phone offers that are already in database
     79# Call to read the configuration file and connect to database
     80cinfo = config_read.get_databaseconfig("../postgresdb.config")
     81db_connection = psycopg2.connect(
     82    database=cinfo[0],
     83    host=cinfo[1],
     84    user=cinfo[2],
     85    password=cinfo[3]
     86)
     87cur = db_connection.cursor()
    7988
    80 offers = json.loads(
    81     unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text))
     89try:
     90    # Tehnomarket phone offers that are already in database
     91    offers = json.loads(
     92        unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text))
    8293
    83 database_offers = []
     94    database_offers = []
    8495
    85 for offer in offers:
    86     phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
    87                             offer['ram_memory'],
    88                             offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
    89                             offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
    90                             offer['image_url'],
    91                             offer['offer_url'], offer['last_updated'], offer['is_validated'],
    92                             offer['offer_description'],
    93                             offer['offer_shop_code'])
    94     database_offers.append(phoneOffer)
     96    for offer in offers:
     97        phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     98                                offer['ram_memory'],
     99                                offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     100                                offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     101                                offer['image_url'],
     102                                offer['offer_url'], offer['last_updated'], offer['is_validated'],
     103                                offer['offer_description'],
     104                                offer['offer_shop_code'])
     105        database_offers.append(phoneOffer)
    95106
    96 new_offers = []
     107    new_offers = []
    97108
    98 for i in range(1, 6):
    99     tehnomarket_url = 'https://tehnomarket.com.mk/category/4109/mobilni-telefoni#page/' + str(i)
    100     # print(anhoch_url)
     109    for i in range(1, 6):
     110        tehnomarket_url = 'https://tehnomarket.com.mk/category/4109/mobilni-telefoni#page/' + str(i)
     111        # print(anhoch_url)
    101112
    102     # selenium is used because of the dynamic content of the page
    103     driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
    104     driver1.get(tehnomarket_url)
     113        # selenium is used because of the dynamic content of the page
     114        driver1 = webdriver.Safari(executable_path='/usr/bin/safaridriver')
     115        driver1.get(tehnomarket_url)
    105116
    106     scrape_function(driver1, i, new_offers)
     117        scrape_function(driver1, i, new_offers)
    107118
    108     # closing the driver so the safari instance can pair with another webdriver session
    109     driver1.close()
     119        # closing the driver so the safari instance can pair with another webdriver session
     120        driver1.close()
    110121
    111 for new_offer in new_offers:
    112     flag = False
    113     flag_price = False
    114     offer_id = None
     122    for new_offer in new_offers:
     123        flag = False
     124        flag_price = False
     125        offer_id = None
     126
     127        for old_offer in database_offers:
     128
     129            if new_offer.offer_shop_code == old_offer.offer_shop_code:
     130                flag = True
     131                if new_offer.price != old_offer.price:
     132                    flag_price = True
     133                    offer_id = old_offer.offer_id
     134
     135        if flag:
     136            # print('ALREADY IN DATABASE')
     137            # print(new_offer)
     138            # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     139            if flag_price:
     140                print('PRICE CHANGED!')  # CHANGE PRICE
     141                print('offer id: ' + str(offer_id))
     142                headers = {'Content-type': 'application/json'}
     143                requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     144                             headers=headers)
     145        else:
     146            print('ADDED')  # ADD OFFER
     147            print(new_offer)
     148            headers = {'Content-type': 'application/json'}
     149            requests.post('http://localhost:8080/phoneoffer/addoffer',
     150                          headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     151
     152    print('------------------------------------')
    115153
    116154    for old_offer in database_offers:
     155        flag = False
     156        for new_offer in new_offers:
     157            if old_offer.offer_shop_code == new_offer.offer_shop_code:
     158                flag = True
    117159
    118         if new_offer.offer_shop_code == old_offer.offer_shop_code:
    119             flag = True
    120             if new_offer.price != old_offer.price:
    121                 flag_price = True
    122                 offer_id = old_offer.offer_id
    123 
    124     if flag:
    125         # print('ALREADY IN DATABASE')
    126         # print(new_offer)
    127         # if it's already in database, check PRICE and if it's changed, change it !!!!!!
    128         if flag_price:
    129             print('PRICE CHANGED!')  # CHANGE PRICE
    130             print('offer id: ' + str(offer_id))
    131             headers = {'Content-type': 'application/json'}
    132             requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
    133                          headers=headers)
    134     else:
    135         print('ADDED')  # ADD OFFER
    136         print(new_offer)
    137         headers = {'Content-type': 'application/json'}
    138         requests.post('http://localhost:8080/phoneoffer/addoffer',
    139                       headers=headers, data=json.dumps(new_offer.__dict__, default=str))
    140 
    141 print('------------------------------------')
    142 
    143 for old_offer in database_offers:
    144     flag = False
    145     for new_offer in new_offers:
    146         if old_offer.offer_shop_code == new_offer.offer_shop_code:
    147             flag = True
    148 
    149     if not flag:
    150         print('OFFER DELETED')
    151         print(old_offer)
    152         # DELETE OFFER
    153         requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     160        if not flag:
     161            print('OFFER DELETED')
     162            print(old_offer)
     163            # DELETE OFFER
     164            requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
     165except Exception:
     166    traceback.print_exc()
     167    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     168                    ' VALUES (%s, %s, %s);'
     169    insert_value = ('Tehnomarket', datetime.now().date(), 'failed')
     170    cur.execute(insert_script, insert_value)
     171    db_connection.commit()
     172    cur.close()
     173    db_connection.close()
     174else:
     175    insert_script = 'INSERT INTO scrapper_info (store, recieved_at, status)' \
     176                    ' VALUES (%s, %s, %s);'
     177    insert_value = ('Tehnomarket', datetime.now().date(), 'success')
     178    cur.execute(insert_script, insert_value)
     179    db_connection.commit()
     180    cur.close()
     181    db_connection.close()
Note: See TracChangeset for help on using the changeset viewer.