React & Flask # 1 – user registration

Frontend:

import React, {useState} from 'react'
import { useHistory } from "react-router-dom";
import {useForm} from "react-hook-form";
import './RegisterForm.css'
function RegisterForm() {

    let history = useHistory();

    const {register, handleSubmit, getValues, formState: { errors } } = useForm();

    const [userRole, setUserRole] = useState('')

    const onSubmit = (data, e) => {
        e.preventDefault()
        data['role'] = userRole
        fetch('/api/user/register', {
            method: 'POST',
            headers: {
                'Content-type': 'application/json',
            },
            body: JSON.stringify(data)
        })
        .then(res => console.log(res.json()))
        history.push('/user/login')
    }
    return (
        <div className="columns">
            <div className="column is-narrow-desktop is-offset-5">
                <div className="card is-3 mt-5">
                    <header className="card-header">
                        <p className="card-header-title has-text-primary-light has-background-dark">
                            User Registration
                        </p>
                    </header>
                    <div className="card-content">
                        <div className="content">
                            <form onSubmit={handleSubmit(onSubmit)}>
                                <div className="field">
                                    <div className="control">
                                        <select name="role" required
                                            value={userRole}
                                            onChange={(e) => setUserRole(e.target.value)}>
                                            <option value="" disabled defaultValue="" hidden>Role</option>
                                            <option value="Contractor" >Contractor</option>
                                            <option value="Customer">Customer</option>
                                        </select>
                                    </div>
                                </div>
                                <div className="field control">
                                    <input placeholder="Username" name="username" 
                                        {...register("username", {required: true, minLength: 3})} />
                                    <p className="help is-danger">
                                        {errors.username?.type === 'required' && "This field is required"}
                                        {errors.username?.type === 'minLength' && "This field must contain at least 3 characters"}
                                    </p>
                                </div>
                                <div className="field control">
                                    <input placeholder="Email" id="email"
                                        autoComplete="email"
                                        {...register("email", {required: true, pattern: /\S+@\S+\.\S+/})} />
                                    <p className="help is-danger">
                                        {errors.email?.type === 'required' && "This field is required"}
                                        {errors.email?.type === 'pattern' && "Invalid email address"}
                                    </p>
                                </div>
                                <div className="field control">
                                    <input type="password" placeholder="Password" id="password" name="password"
                                        autoComplete="new-password"
                                        {...register("password",  {required: true, minLength: 6})} />
                                    <p className="help is-danger">
                                        {errors.password?.type === 'required' && "This field is required"}
                                        {errors.password?.type === 'minLength' && "This field must contain at least 6 characters"}
                                    </p>
                                </div>
                                <div className="field control">
                                    <input type="password" placeholder="Retype Password" id="confirm_password"
                                        autoComplete="new-password" 
                                        {...register("confirm_password",  {required: true, minLength: 6,
                                            validate: value => value === getValues('password')})} />
                                    <p className="help is-danger">
                                        {errors.confirm_password?.type === 'required' && "This field is required"}
                                        {errors.confirm_password?.type === 'minLength' && "This field must contain at least 6 characters"}
                                        {errors.confirm_password?.type === 'validate' && "Passwords don't match"}
                                    </p>
                                </div>

                                <div className="control">
                                    <input type="submit" value="Register" className="button is-link" />    
                                </div>
                            </form>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        

    )
}

export default RegisterForm

Backend – main file: __init__py

def create_app(Config):
    app =  Flask(__name__, static_folder='../build', static_url_path='/')
    app.config.from_object(Config)
    api.init_app(app)
    db.init_app(app)
    app.db = db
    bcrypt = Bcrypt(app)
    app.bcrypt = bcrypt


    from .common import utils
    from .common import routes
    app.register_blueprint(api_bp)
    app.register_blueprint(utils.bp)
    app.register_blueprint(routes.bp)
    return app


from api.resources.auth import UserRegister
api.add_resource(UserRegister, '/api/user/register', endpoint='user_register')

File that contains the Resource class for handling user registration:

from flask import current_app, request
from flask_restful import Resource
from api.common.parsers import parser
from ..common.utils import send_email
from ..common.models import User
from .. import db
class UserRegister(Resource):

    def post(self):
        req = request.json
        username = req['username']
        password = req['password']
        email = req['email']
        role = req['role']
        bcrypt = current_app.bcrypt
        hashed_password = bcrypt.generate_password_hash(password).decode('utf-8')
        status = send_email(email, category='confirm_account')
        user = User(username=username, password=hashed_password, email=email, role=role)
        db.session.add(user)
        db.session.commit()
        return {"message": status}

Forms in React using React Hook Form

My sample login form:

import React from 'react'
import {useForm} from "react-hook-form"


function LoginForm() {

    const {register, handleSubmit,  formState: { errors } } = useForm();
    

    const onSubmit = (data, e) =>  {
        e.preventDefault();
        console.log(data)
        };


    return (
        <form onSubmit={handleSubmit(onSubmit)}>

            <input placeholder="Enter username" id="username" 
                {...register("username", {required: true, minLength: 3})} />
            {errors.username?.type === 'required' && "This field is required"}
            {errors.username?.type === 'minLength' && "This field must contain at least 3 characters."}
            {/* {errors.username && <span>This field is required and must contain at least 3 characters.</span>} */}
            
            <input type="password" placeholder="Password" id="password" 
                {...register("password",  {required: true, minLength: 6})} />
            {errors.password?.type === 'required' && "This field is required"}
            {errors.password?.type === 'minLength' && "This field must contain at least 6 characters."}


            <input type="submit" value="Login" />    

        </form>
    )
}

export default LoginForm

 

React app router snippet

Main app component sample snippet with BrowserRouter:

import './App.css';
import {
  BrowserRouter,
  Switch,
  Route,
  Link
} from 'react-router-dom' 
import LoginForm from './components/LoginForm/LoginForm'

function App() {
  return (
    <BrowserRouter>
      <div className="App">
        <Switch>
          <Route path="/" exact>
            <LoginForm />
            <Link to="/about">About</Link>
          </Route>
          <Route path="/about">
            <Link to="/">Go to main page</Link>
          </Route>
          <Route path="/to-component" component={LoginForm} />
        </Switch>
      </div>
    </BrowserRouter>
  );
}

export default App;

Order processing for Logistics Centers

The application allows the customer to place new orders and accept orders by the contractor as well as generate reservations, i.e. manage delivery time slots to central warehouses and logistics centers.

The source code for the application is available here.

In this post, I will introduce the new_booking(id) function which handles the booking for a specific order (contract). It takes the contract identification number as a parameter, i.e.

@bp.route('/booking/<int:id>', methods=['GET', 'POST'])
@login_required
def new_booking(id):

Then, in the new_booking () function, I create a booking form and get the booking object for a given contract, i.e.

    form = BookingForm()
    result = Booking.query.filter_by(contract_id=id).first()

Next, I set the contract object for the booking, i.e.

    current_contract = Contract.query.get(id)     # Returns contract for current booking

In the following lines I get a list of orders that were generated for a specific warehouse on the day of delivery, i.e.

# Filtering by one column gives a list of tuple(s) so I converted it to a list of values
    contracts = [ids[0] for ids in Contract.query.with_entities(Contract.id).filter_by(
        date_of_delivery=current_contract.date_of_delivery).filter_by(
        warehouse=current_contract.warehouse).all()]

Then I validate if the form has been submitted. Depending on whether the reservation for a given contract has already been created, I update the data in the database based on the data from the form. If the booking object does not exist, I create a new booking object and save it to the database, i.e.

    if form.validate_on_submit():
        if result:
            result.booking_time = form.booking_time.data
            result.driver_full_name = form.driver_full_name.data
            result.driver_phone_number = form.driver_phone_number.data
            result.truck_reg_number = form.truck_reg_number.data
            db.session.commit()
        else:
            booking = Booking(booking_time=form.booking_time.data, 
                            contract_id = id,
                            driver_full_name=form.driver_full_name.data,
                            driver_phone_number=form.driver_phone_number.data,
                            truck_reg_number=form.truck_reg_number.data)
            db.session.add(booking)
            db.session.commit()

Next, I change the order status to accepted and redirect to the function displaying all orders for a given contractor, i.e.

        contract = Contract.query.get(id)
        contract.status = 'accepted'
        db.session.commit()
        page = session.get('page')
        per_page = session.get('per_page')
        return redirect(url_for('contracts.contracts', page=page, per_page=per_page))

If the page is loaded using the GET method, then depending on whether the booking is already present in the system or is being created, the data is completed in the form, i.e. if the booking is available and we open it, e.g. to edit the data, then the form will be completed with the previous one entered data. However, if the reservation is just being created, the form will contain a modification, the dates of which cannot be selected, because they have already been selected by other suppliers, i.e.

    if result is not None:
        reserved_booking_time = [times[0] for times in 
            Booking.query.with_entities(Booking.booking_time).filter(
            Booking.contract_id.in_(contracts)).all() if times[0]!=result.booking_time]   
        form.booking_time.data = result.booking_time
        form.driver_full_name.data = result.driver_full_name
        form.driver_phone_number.data = result.driver_phone_number
        form.truck_reg_number.data = result.truck_reg_number   
    else:
        reserved_booking_time = [times[0] for times in 
            Booking.query.with_entities(Booking.booking_time).filter(
            Booking.contract_id.in_(contracts)).all()] 
    return render_template('booking.html', form=form, reserved_booking_time=reserved_booking_time)

The complete code for the new_booking() function is available here.

A contract accepted in this way can only be edited by the customer. However, if the contract has already been accepted and the booking has been made, the change of the contract changes the status of the order from accepted back to open and requires the contractor to accept the changes again (but the entered data are present and the reservation is does not require data to be repeated).

The contractor may also download a pdf of the booking if the contract is accepted (for open contract the option to download pdf is inactive).

Reservation made – now you can generate pdf

Reportlab – generating and downloading pdf in Flask without creating the file on the server

Solved problem: how to generate pdf with order data and reservation barcode without saving the file on disk, which can then be downloaded by the website user in Flask.

We can use the Canvas object to embed document elements such as texts, tables, etc. As an argument we can give the name of the created pdf, but in this case I will use the buffer instead of creating the file on the server. I’m explicitly entering the page size as A4.

from io import BytesIO
from reportlab.pdfgen.canvas import Canvas
from reportlab.lib.pagesizes import A4

buffer = BytesIO()
canvas = Canvas(buffer, pagesize=A4)

To use non-standard characters, register the appropriate fonts that support the required characters. In the sample pdf file, I used the Vera font.

from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

pdfmetrics.registerFont(TTFont('Vera', 'Vera.ttf'))

The registered font can now be used:

canvas.setFont("Vera", size=10)

I place the images (in this case, the reservation barcode) generated on the basis of the data from the application in the document being created using the drawImage () method, which takes the ImageReader object as the first argument, i.e.

im = ImageReader(image)
canvas.drawImage(im, x=0, y=-5*cm, width=150, height=100)

A list of text strings can be added to a document using a text object, i.e.

txt_obj = canvas.beginText(14, -6.5 * cm)
txt_lst = ["line of text 1", "line of text 2", "line of text 3"]
for line in txt_lst:
        txt_obj.textOut(line)
        txt_obj.moveCursor(0, 16)
canvas.drawText(txt_obj)

A table can be added to a document as a Table class object. I separately define a list containing the data of individual table rows (table_data variable). I also define styles that are valid for all or part of the table.

t = Table(table_data, colWidths=[60, 230, 70, 60, 50], rowHeights=30)
style = [('BACKGROUND',(0,0),(-1,-2),colors.lightblue),
            ('ALIGN',(0,-1),(-1,-1),'CENTER'),
            ('BOX', (0,0), (-1,-1), 0.25, colors.black),
            ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
            ('FONTSIZE', (0,0), (-1,-1), 10),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE')]
t.setStyle(tblstyle=style)
t.wrapOn(canvas, 10, 10)
    t.drawOn(canvas=canvas, x=20, y=-22*cm)

Then I save the created Canvas object and return the resulting buffer to the controller. In order to send a pdf file based on data from the buffer, I use the send_file () Flask function:

from flask.helpers import send_file

@bp.route('/get-pdf/<int:id>', methods=['GET'])
@login_required
def get_pdf(id):
    contract = Contract.query.get(id)
    contractor = User.query.get(contract.contractor_id)
    booking = Booking.query.filter_by(contract_id=contract.id).first()
    pdf = create_pdf(booking_no=booking.id,
                        contractor=contractor.username,
                        contractor_no=contractor.id,
                        truck_plate=booking.truck_reg_number,
                        warehouse=contract.warehouse,
                        date=contract.date_of_delivery,
                        time=booking.booking_time,
                        pallets_pos=contract.pallets_position,
                        pallets=contract.pallets_actual)
    pdf.seek(0)
    return send_file(pdf, as_attachment=True, mimetype='application/pdf',
        attachment_filename='booking.pdf', cache_timeout=0)
sample file generated on the server side, downloaded by the contractor.

Time slot management system using Flask #2

The source code of the application can be downloaded here.

The plan for the application described in the previous article looks like this:

├───flask_contracts
│ ├───api
│ ├───auth
│ │ ├───static
│ │ │ └───logos
│ │ ├───templates
│ │ │ └───auth
│ │ ├───utils
│ │ │ ├───templates
│ │ │ │ └───utils
│ ├───contracts
│ │ ├───static
│ │ ├───templates
│ │ │ └───contracts
│ │ │ └───errors
│ │ ├───utils
│ ├───static
│ ├───templates
├───tests
│ ├───functional
│ ├───unit
└───venv

Each part of the application is a separate entity. A similar solution is used in Django. I wrote the manage.py file to manage the application. Each component has its own template subdirectories – a convention similar to Django’s.

Time slot management system using Flask

Thanks to the application: downtime is minimized and warehouse operation is optimized, which will reduce costs.

The application allows you to manage delivery times to central warehouses and logistics centers.

The source code of the application can be downloaded here.

The project will include an application in Flask (using the css – Bulma framework).

Customers and contractors of supplies will be able to log in to the application (verification of the account by e-mail to the address provided).

Only customers of deliveries will be able to issue orders, while contractors will be able to reserve available delivery appointments and generate a pdf document with a barcode.

part 2

Transfer of values ​​to and from modal (Bulma framework) in Flask

Solved problem: how to pass a value to a modal window and how to pass a value from a modal window to an application in Flask.

In the presented case, each row of the html table contains a button that enables canceling the contract. The data for each row is passed from the controller using the variable contracts:

result = Contract.query.filter_by(
            customer_id=current_user.id).filter(
                Contract.status!='cancelled').order_by(
                    Contract.id.desc()).paginate(page=page, per_page=per_page)
return render_template('contracts.html', title='Contracts', header=columns, contracts=result)

The data for the rows is available in the template:

{% for contract in contracts.items %}
    <tr>
        <td>{{  loop.index  }}</td>
        {% for item in header %}
            <td>
                 {{ contract[item] }}
            </td>
        {% endfor %}
    </tr>
{% endfor %}

In addition to the data sent from the controller, for each row of the table, a button should be created that activates the appropriate modal – differing in the value of the data-target parameter, i.e.

<button class="button mr-2 modal-button" data-target="modal{{contract.id}}"  title="Cancel Contract">

The value of the data-target parameter will vary for each row depending on the value of contract.id, which is unique – the Contract table key.

Depending on the value of the data-target parameter, the modal with the same value of the id parameter is launched, i.e.

<div id="modal{{contract.id}}" class="modal">

The modal contains a button to approve changes, in this case to cancel the contract. To pass the contract to be canceled, the contract id should be passed as a parameter to the url_for () function, i.e.

<form action="{{ url_for('contracts.cancel_contract', id=contract.id) }}">
            <button class="button is-danger is-rounded" formmethod="POST">Yes</button>
</form>

Passing SQL table column names to jinja2 (Flask) view

Solved issue: how to insert sql table column headers into template in Flask.

First, I will create a model – using sqlalchemy, I create a class that inherits from the model class. The attributes of this class correspond to the column names of the database table. An example of a Contract class declaration would look like this:

class Contract(db.Model):

Then in the controller I pass the column names from the model created in this way to the template:

@bp.route('/contract', methods=['GET'])
@login_required
def contracts(page=1, per_page=5):
    columns = = [m.key for m in Contract.__table__.columns]
   # some other code here, not important for this example
    return render_template('contracts.html', title='Contracts', header=columns)

Data from the header passed to the template can be e.g. placed in the header of the html table:

<thead>
    <tr>
        {% for item in header %}
            <th class="is-info">
                {{ item }}
             </th>
        {% endfor %}
     </tr>
 </thead>

manage.py for Flask

My tool for creating a database, administrator account and server startup.

manage.py code:

from sys import argv
from getpass import getpass
from app_name import create_app, db, bcrypt
from app_name.config import Config, ConfigProduction, ConfigTesting
from app_name.auth.models import User

app = create_app(Config) 


def create_db():
    app.app_context().push()
    db.create_all()

def create_admin():
    app.app_context().push()
    username = input('Enter admin name: ')
    email = input('Enter admin email: ')
    password = getpass('Enter admin password: ')
    confirm_password = getpass('Retype admin password: ')
    # Simple validation here:
    if '' in (username, email, password, confirm_password): raise ValueError('Empty value occurred')
    if password != confirm_password: raise ValueError("Passwords don't match")
    if User.query.filter_by(username=username).first(): raise ValueError('User already exists in db')

    hashed = bcrypt.generate_password_hash(password).decode('utf-8')
    admin = User(username=username, email=email, password=hashed, admin=True, confirmed=True)
    db.session.add(admin)
    db.session.commit()


if __name__ == '__main__':
    if len(argv) == 2:
        _, command = argv
        if command == 'run': app.run()
        if command == 'create_db': create_db()
        if command == 'create_admin': create_admin()

    else:
        print('Invalid command.')

Refueling report from xlsx file to Pandas + localized string resources

The source files of the program along with a sample report from a gas station can be found >> here <<

The Reports class has three methods: __init__(), load_resources(), and my_parser().

The __init__() method reads all files in the Data directory that have an xlsx extension. The last added file from this file list is loaded – the pandas frame object is created.

In the load_resources() method, I load localized strings depending on the user’s locale. If there is no appropriate file, the version with messages in English is loaded.

In the my_parser () method, I sum up the collected fuel with the division into vehicles. In case of discrepancies with the source documents, you can view a detailed list of refueling for a specific vehicle giving part of the registration number.

script code: main.py

import os
import glob
import locale
import calendar
import pandas as pd
import importlib_resources


class Reports:
	
    def __init__(self):
        self.load_resources()
        script_dir = os.path.dirname(os.path.abspath(__file__))
        data_dir = os.path.join(script_dir, 'Data')
        list_of_files = glob.glob(os.path.join(data_dir, '*.xlsx'))
        latest_file = max(list_of_files, key=os.path.getctime)
        path_to_file = os.path.join(data_dir, latest_file)
        self.df = pd.read_excel(path_to_file)
        self.my_parser()
    
    def load_resources(self):
        self.locale, encoding = locale.getdefaultlocale()
        r = importlib_resources.files('Resources')
        try:
            r_strings = (r / f'strings_{self.locale}.txt').read_text(encoding='utf-8').splitlines()
        except:
            r_strings = (r / f'strings_en_US.txt').read_text(encoding='utf-8').splitlines()
            self.locale = 'en_US'
        self.r_str = dict(x.split(':') for x in r_strings)


    
    def my_parser(self):
        os.system('cls' if os.name == 'nt' else 'clear')
        print()
        f_date = self.df['Date'].min()
        l_date = self.df['Date'].max()

        with calendar.different_locale(self.locale):
            print(self.r_str['title'], f'({f_date.day} -  {l_date.day} {calendar.month_abbr[int(l_date.month)]} {l_date.year})')
        self.df['Amount'] = self.df['Amount'].astype(str)
        self.df['Amount'] = self.df['Amount'].str.replace(',', '.')
        self.df['Amount'] = self.df['Amount'].astype(float)
        print()
        print(self.r_str['fuel_total'], self.df['Amount'].sum())
        cars_group = self.df.groupby('Registration number')
        print()
        # print(cars_group['Amount'].sum()) #standard output
        for item in zip(cars_group.groups, cars_group['Amount'].sum().values):
            name, value = item
            print(f'{name}    {value}')


        print()
        while True:
            choice = input(self.r_str['choice'] + ': ')
            os.system('cls' if os.name == 'nt' else 'clear')
            if choice == '/q': raise SystemExit
            if choice == '/m': self.my_parser()
            for key, value in cars_group['Amount']:
                if choice.upper() in key:
                    print(self.r_str['refueling'], key)
                    total = 0
                    for i, v in zip(value.index, value.values):
                        print(f"{self.df.loc[i, 'Date']}   {v}")
                        total +=v
                    print(self.r_str['ref_sum'] + f' {key}: {total}')
                    print()


if __name__ == "__main__":
    reports = Reports()

XML file to pandas DataFrame object – minidom

In this article, I will describe how to load data from an XML file into a DataFrIn the created document, I search for a list of elements from the XML file named person and, in turn, I retrieve the value of the id attribute for each of them, which I save in the persons dictionary.ame object using minidom module.

Project files are available for download >>here<<

The same XML file as described in the previous post is used.

In addition to xml.dom.minidom, I also use the pandas module and the defaultdict class from the collections module.

Using the context manager, I open the XML file, which is loaded and from which the DOM document is created. Moreover, for each child tag, ie position, first_name, last_name, etc., I save its value.

The persons dictionary is as an argument when creating a DataFrame object.

main.py source code:

import xml.dom.minidom
from collections import defaultdict
import pandas as pd

persons = defaultdict(list)
with xml.dom.minidom.parse(open('persons.xml')) as tree:
    persons_list = tree.getElementsByTagName('person')
    for person in persons_list:
        persons['id'].append(person.getAttribute('id'))
        for tag in ('position', 'first_name', 'last_name', 'email', 'salary'):
            persons[tag].append(person.getElementsByTagName(tag)[0].firstChild.data)


df = pd.DataFrame(persons, columns=persons.keys()).set_index('id')
df['salary'] = df['salary'].astype(float)
print(df.sort_values(by='salary', ascending=False))

XML file to pandas DataFrame object – sax

In this article, I will describe how to load data from an XML file into a DataFrame object using xml.sax module.

Project files are available for download >>here<<

The same XML file as described in the previous post is used.

In addition to xml.sax, I also use the pandas module and the defaultdict class from the collections module.

The first step in reading data from an XML file using SAX is to implement your own class that inherits from the ContentHandler() class. The handler class overrides three methods of the base class: the startElement() method, which is called when reading the next tag starts, characters () method, which reads the values ​​stored for a particular element and the endElement () method, which is called when reading the corresponding tag has finished. In addition, in the init () method, I create an instance of the dictionary in which the data read from the XML file will be saved.

In the startElement () method, I define a class attribute called tag, and if the processed tag is ‘person’ I write the id number for the person.

Then, in the characters() method, I save the values ​​stored in each element as the appropriate class variables.

In the endElement () method, I save the retrieved values ​​to the persons dictionary.

Parsing the XML file with the xml.sax module is done by calling the make_parser() method, which returns an instance of the parser. Then the handler instance is assigned to the created parser as an argument to the setContentHandler function() – in this case the PersonsHandler class.

The parse () method is then called to parse the source XML document.

The persons variable holds the values ​​of the dictionary created by the handler, and that dictionary is passed as a parameter when creating a DataFrame.

main.py souce code:

import xml.sax
from collections import defaultdict
import pandas as pd

class PersonsHandler(xml.sax.ContentHandler):
    def __init__(self):
        self.persons = defaultdict(list)
    def startElement(self, tag, attr):
        self.tag = tag
        if tag == 'person':
            self.persons['id'].append(attr['id'])
            
    def characters(self, content):
        if content.strip():
            if self.tag == 'position': self.position = content
            elif self.tag == 'first_name': self.first_name = content
            elif self.tag == 'last_name': self.last_name = content
            elif self.tag == 'email': self.email = content
            elif self.tag == 'salary': self.salary = content
    
    def endElement(self, tag):
        if tag == 'position': self.persons['position'].append(self.position)
        elif tag == 'first_name': self.persons['first_name'].append(self.first_name)
        elif tag == 'last_name': self.persons['last_name'].append(self.last_name)
        elif tag == 'email': self.persons['email'].append(self.email)
        elif tag == 'salary': self.persons['salary'].append(self.salary)

parser = xml.sax.make_parser()
parser.setContentHandler(PersonsHandler())
parser.parse(open('persons.xml'))
persons = parser.getContentHandler().persons


df = pd.DataFrame(persons, columns=persons.keys()).set_index('id')
df['salary'] = df['salary'].astype(float)
print(df.sort_values(by='salary', ascending=False))

XML file to pandas DataFrame object – ElementTree

In this article, I will describe how to load data from an XML file into a DataFrame object using xml.etree.ElementTree.

Project files are available for download >>here<<

The sample XML file will describe people employed in the company and will have the following form:

<persons>
    <person id="">
        <position></position>
        <first_name></first_name>
        <last_name></last_name>
        <email></email>
        <salary></salary>
    </person>
</persons>

The following data is saved for each person with a unique id attribute: position, name, surname, email address and salary.

First, I import the necessary modules, ie xml.etree.ElementTree to parse the XML document. From the collections module I import defaultdict, which will store lists containing first names, surnames, salaries, etc., which I then pass as an argument of the DataFrame class.

import xml.etree.ElementTree as et
from collections import defaultdict
import pandas as pd

In the next line, I create a dictionary that will store employee data obtained from the XML file:

persons = defaultdict(list)

Then I load the XML file and download the parent element – root – persons, and then for each child element – person I get the value of its id attribute, as well as the values ​​of its elements: position, first_name, last_name, etc. Each of these values ​​is added as the next element of the appropriate dictionary list.

tree = et.parse("persons.xml")
root = tree.getroot()
for child in root:
    id = child.attrib.get('id')
    position = child.find('position').text
    first_name = child.find('first_name').text
    last_name = child.find('last_name').text
    email = child.find('email').text
    salary = child.find('salary').text
    
    persons['id'].append(id)
    persons['position'].append(position)
    persons['first_name'].append(first_name)
    persons['last_name'].append(last_name)
    persons['email'].append(email)
    persons['salary'].append(salary)

I pass the created dictionary as an argument to the DataFrame object being created, with the names of the dictionary keys as the names of the columns and the id column as the DataFrame index.

Then I change the data type of the salary column to float to sort the frame in order of decreasing values ​​of that column. Alternatively, the conversion to float could be done when creating the DataFrame object, additionally providing the dtype argument.

Color maps in matplotlib

To create a scatter chart that uses a selected color palette, enter the name of the selected color scheme as the cmap argument.

The colors are grouped (see documentation). For instance: Sequential – different intensity of one color, Diverging – different intensity of two contrasting colors, Qualitative – different contrasting colors, but matching the palette, e.g Pastel1 – containing different pastel colors.

import matplotlib.pyplot as plt

fig, ax = plt.subplots()

ax.set_title('Chart title')
ax.set_xlabel('X axis label')
ax.set_ylabel('Y axis label')

# x - list with X axis values, e.g. product name
# y - list with Y axis values, e.g. product price
# intensivity - a list with numbers corresponding to the intensity of the attribute
# cm - name of the selected color palette

ax.scatter(x, y, c=intensivity,  cmap=cm)  
  # e.g: plt.scatter(x, y, c=intensivity, s=50,  cmap='plasma')

mappable = ax.collections[0]
cbar = fig.colorbar(mappable=mappable)
cbar.set_label('intensivity')

plt.show()

The s parameter is the size of drawn characters.

To display a colorbar that shows the intensity of a given feature for the displayed point, the mappable object should be specified as a parameter, which for the scatter plot is stored as an element of the AxesSubplot collections list.

As a result, on the chart, which may show, for example, the name of the product and the price, you can add additional information using the color, e.g. popularity among buyers or the quantity of goods in the warehouse (e.g. green – the product is available without any problems, up to red – no goods in stock) )