Ignore:
Timestamp:
10/01/22 22:55:27 (21 months ago)
Author:
Marko <Marko@…>
Branches:
master
Children:
fd5b100
Parents:
48f3030
Message:

Refactored code

File:
1 edited

Legend:

Unmodified
Added
Removed
  • phonelux_scrappers/scrappers/tehnomarket_scrapper.py

    r48f3030 r895cd87  
     1import json
    12import unicodedata
    23from datetime import datetime
     
    67from selenium import webdriver
    78import requests
     9import sys
    810
    9 import sys
     11from classes.phoneoffer import PhoneOffer
    1012
    1113file_path = 'outputfile.txt'
    1214sys.stdout = open(file_path, "w")
    1315
    14 # Call to read the configuration file and connect to database
    15 cinfo = config_read.get_databaseconfig("../postgresdb.config")
    16 db_connection = psycopg2.connect(
    17     database=cinfo[0],
    18     host=cinfo[1],
    19     user=cinfo[2],
    20     password=cinfo[3]
    21 )
    22 cur = db_connection.cursor()
    2316
    24 
    25 def scrape_function(driver1, i):
     17def scrape_function(driver1, i, new_offers):
    2618    offer_shop = "Tehnomarket"  # offer shop
    2719    last_updated = datetime.now().date()
     
    5951            offer_shop_code = details[4].strip()
    6052
     53            back_camera = None
     54            operating_system = None
     55            chipset = None
     56            battery = None
     57            ram_memory = None
     58            rom_memory = None
     59            cpu = None
     60            front_camera = None
     61            color = None
     62
    6163            specifications = []
    6264            for info in soup2.find_all('span', {'class': 'info'}):
    6365                specifications.append(info.get_text())
    6466
    65             print(brand)
    66             print(offer_name)
    67             print()
    68             print()
    69 
    7067            offer_description = '\n'.join(specifications)
    7168
    72             insert_script = 'INSERT INTO phone_offers (offer_shop, brand, offer_name, price, image_url, offer_url,' \
    73                             'offer_description, offer_shop_code, last_updated, is_validated)' \
    74                             ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
    75             insert_value = (offer_shop, brand, offer_name, price, image_url, offer_url, offer_description,
    76                             offer_shop_code, last_updated, is_validated)
    77             cur.execute(insert_script, insert_value)
    78             db_connection.commit()
     69            new_offers.append(PhoneOffer(offer_shop, offer_name, price, ram_memory, rom_memory,
     70                                         color, front_camera, back_camera, chipset, battery, operating_system, cpu,
     71                                         image_url,
     72                                         offer_url, last_updated, is_validated, offer_description, offer_shop_code))
    7973    else:
    8074        driver1.implicitly_wait(30)
    81         scrape_function(driver1, i)
     75        scrape_function(driver1, i, new_offers)
    8276
     77
     78# Tehnomarket phone offers that are already in database
     79
     80offers = json.loads(
     81    unicodedata.normalize('NFKD', requests.get('http://localhost:8080/phoneoffer/shop/tehnomarket').text))
     82
     83database_offers = []
     84
     85for offer in offers:
     86    phoneOffer = PhoneOffer(offer['id'], offer['offer_shop'], offer['offer_name'], offer['price'],
     87                            offer['ram_memory'],
     88                            offer['rom_memory'], offer['color'], offer['front_camera'], offer['back_camera'],
     89                            offer['chipset'], offer['battery'], offer['operating_system'], offer['cpu'],
     90                            offer['image_url'],
     91                            offer['offer_url'], offer['last_updated'], offer['is_validated'],
     92                            offer['offer_description'],
     93                            offer['offer_shop_code'])
     94    database_offers.append(phoneOffer)
     95
     96new_offers = []
    8397
    8498for i in range(1, 6):
     
    90104    driver1.get(tehnomarket_url)
    91105
    92     scrape_function(driver1, i)
     106    scrape_function(driver1, i, new_offers)
     107
    93108    # closing the driver so the safari instance can pair with another webdriver session
    94109    driver1.close()
    95110
    96 cur.close()
    97 db_connection.close()
     111for new_offer in new_offers:
     112    flag = False
     113    flag_price = False
     114    offer_id = None
     115
     116    for old_offer in database_offers:
     117
     118        if new_offer.offer_shop_code == old_offer.offer_shop_code:
     119            flag = True
     120            if new_offer.price != old_offer.price:
     121                flag_price = True
     122                offer_id = old_offer.offer_id
     123
     124    if flag:
     125        # print('ALREADY IN DATABASE')
     126        # print(new_offer)
     127        # if it's already in database, check PRICE and if it's changed, change it !!!!!!
     128        if flag_price:
     129            print('PRICE CHANGED!')  # CHANGE PRICE
     130            print('offer id: ' + str(offer_id))
     131            headers = {'Content-type': 'application/json'}
     132            requests.put('http://localhost:8080/phoneoffer/' + str(offer_id) + '/changeprice/' + str(new_offer.price),
     133                         headers=headers)
     134    else:
     135        print('ADDED')  # ADD OFFER
     136        print(new_offer)
     137        headers = {'Content-type': 'application/json'}
     138        requests.post('http://localhost:8080/phoneoffer/addoffer',
     139                      headers=headers, data=json.dumps(new_offer.__dict__, default=str))
     140
     141print('------------------------------------')
     142
     143for old_offer in database_offers:
     144    flag = False
     145    for new_offer in new_offers:
     146        if old_offer.offer_shop_code == new_offer.offer_shop_code:
     147            flag = True
     148
     149    if not flag:
     150        print('OFFER DELETED')
     151        print(old_offer)
     152        # DELETE OFFER
     153        requests.delete('http://localhost:8080/phoneoffer/deleteoffer/' + str(old_offer.offer_id))
Note: See TracChangeset for help on using the changeset viewer.