def add_item_to_sale(self):
barcode = self.barcode_input.text()
if not barcode:
return
drug = db.find_drug_by_barcode(barcode)
if not drug:
QMessageBox.warning(self, "Not Found", "No drug found with this barcode.")
self.barcode_input.clear()
return
drug_id = drug[0]
if drug[3] <= 0: # Check quantity
QMessageBox.warning(self, "Out of Stock", f"{drug[1]} is out of stock.")
self.barcode_input.clear()
return
if drug_id in self.current_sale_items:
# Item already in sale, increment quantity
self.current_sale_items[drug_id]['quantity'] += 1
else:
# Add new item to sale
self.current_sale_items[drug_id] = {
'data': drug,
'quantity': 1
}
self.update_sales_table()
self.barcode_input.clear()
def update_sales_table(self):
self.sales_table.setRowCount(len(self.current_sale_items))
total_sale_amount = 0.0
for row, item in enumerate(self.current_sale_items.values()):
drug_data = item['data']
quantity = item['quantity']
unit_price = drug_data[4]
total_price = quantity * unit_price
self.sales_table.setItem(row, 0, QTableWidgetItem(str(drug_data[0]))) # ID
self.sales_table.setItem(row, 1, QTableWidgetItem(drug_data[1])) # Name
self.sales_table.setItem(row, 2, QTableWidgetItem(str(quantity)))
self.sales_table.setItem(row, 3, QTableWidgetItem(f"{unit_price:.2f}"))
self.sales_table.setItem(row, 4, QTableWidgetItem(f"{total_price:.2f}"))
total_sale_amount += total_price
self.total_amount_label.setText(f"{total_sale_amount:.2f}")
def complete_sale(self):
if not self.current_sale_items:
return
for drug_id, item in self.current_sale_items.items():
db.update_drug_quantity(drug_id, item['quantity'])
QMessageBox.information(self, "Success", f"Sale completed. Total: {self.total_amount_label.text()}")
self.clear_sale()
self.load_inventory_data() # Refresh inventory tab to show new quantities
def clear_sale(self):
self.current_sale_items.clear()
self.update_sales_table()
#Hashtags: #BusinessLogic #PointOfSale #PythonCode #Transaction
---
#Step 5: Results and DiscussionWith all the code in place, you have a fully functional pharmacy management system.
How to Use It:
âĒ Run the
main.py script.âĒ Go to the "Inventory Management" tab and add a few drugs with unique barcodes.
âĒ Go to the "Point of Sale" tab. The cursor will be in the barcode input field.
âĒ Type a barcode of a drug you added and press Enter. The drug will appear in the sales table.
âĒ Scan the same barcode again. The quantity for that drug in the sales table will increase to 2.
âĒ Click "Complete Sale". A success message will appear. The sales table will clear.
âĒ Switch back to the "Inventory Management" tab. You will see that the quantity of the sold drugs has decreased accordingly.
âĪ2
Discussion and Potential Improvements:
Real Barcode Scanner: This application works directly with a USB barcode scanner. A scanner acts as a keyboard, so when it scans a code, it types the numbers and sends an "Enter" keystroke, which perfectly triggers our
Data Integrity: We added a basic check for stock (
Features for a Real Pharmacy: A production-level system would need many more features: prescription management, patient records, batch tracking for recalls, advanced reporting (e.g., top-selling drugs, low-stock alerts), user accounts with different permission levels, and receipt printing.
Database: SQLite is perfect for a single-user, standalone application. For a pharmacy with multiple terminals, a client-server database like PostgreSQL or MySQL would be necessary.
This project provides a solid foundation, demonstrating how to integrate hardware (like a barcode scanner) with a database-backed desktop application to solve a real-world business problem.
#ProjectComplete #SoftwareEngineering #PythonGUI #HealthTech
âââââââââââââââ
By: @DataScienceN âĻ
Real Barcode Scanner: This application works directly with a USB barcode scanner. A scanner acts as a keyboard, so when it scans a code, it types the numbers and sends an "Enter" keystroke, which perfectly triggers our
returnPressed signal.Data Integrity: We added a basic check for stock (
quantity > 0). A more robust system would check if the quantity in the cart exceeds the quantity in stock before allowing the sale to complete.Features for a Real Pharmacy: A production-level system would need many more features: prescription management, patient records, batch tracking for recalls, advanced reporting (e.g., top-selling drugs, low-stock alerts), user accounts with different permission levels, and receipt printing.
Database: SQLite is perfect for a single-user, standalone application. For a pharmacy with multiple terminals, a client-server database like PostgreSQL or MySQL would be necessary.
This project provides a solid foundation, demonstrating how to integrate hardware (like a barcode scanner) with a database-backed desktop application to solve a real-world business problem.
#ProjectComplete #SoftwareEngineering #PythonGUI #HealthTech
âââââââââââââââ
By: @DataScienceN âĻ
âĪ3
ðĨ Trending Repository: nano-vllm
ð Description: Nano vLLM
ð Repository URL: https://github.com/GeeeekExplorer/nano-vllm
ð Readme: https://github.com/GeeeekExplorer/nano-vllm#readme
ð Statistics:
ð Stars: 7.4K stars
ð Watchers: 62
ðī Forks: 949 forks
ðŧ Programming Languages: Python
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: Nano vLLM
ð Repository URL: https://github.com/GeeeekExplorer/nano-vllm
ð Readme: https://github.com/GeeeekExplorer/nano-vllm#readme
ð Statistics:
ð Stars: 7.4K stars
ð Watchers: 62
ðī Forks: 949 forks
ðŧ Programming Languages: Python
ð·ïļ Related Topics:
#nlp #deep_learning #inference #pytorch #transformer #llm
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: glow
ð Description: Render markdown on the CLI, with pizzazz! ð ðŧ
ð Repository URL: https://github.com/charmbracelet/glow
ð Readme: https://github.com/charmbracelet/glow#readme
ð Statistics:
ð Stars: 19.9K stars
ð Watchers: 75
ðī Forks: 480 forks
ðŧ Programming Languages: Go - Dockerfile
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: Render markdown on the CLI, with pizzazz! ð ðŧ
ð Repository URL: https://github.com/charmbracelet/glow
ð Readme: https://github.com/charmbracelet/glow#readme
ð Statistics:
ð Stars: 19.9K stars
ð Watchers: 75
ðī Forks: 480 forks
ðŧ Programming Languages: Go - Dockerfile
ð·ïļ Related Topics:
#markdown #cli #hacktoberfest #excitement
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: hacker-scripts
ð Description: Based on a true story
ð Repository URL: https://github.com/NARKOZ/hacker-scripts
ð Readme: https://github.com/NARKOZ/hacker-scripts#readme
ð Statistics:
ð Stars: 49K stars
ð Watchers: 2.1k
ðī Forks: 6.7K forks
ðŧ Programming Languages: JavaScript - Python - Java - Perl - Kotlin - Clojure
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ð Description: Based on a true story
ð Repository URL: https://github.com/NARKOZ/hacker-scripts
ð Readme: https://github.com/NARKOZ/hacker-scripts#readme
ð Statistics:
ð Stars: 49K stars
ð Watchers: 2.1k
ðī Forks: 6.7K forks
ðŧ Programming Languages: JavaScript - Python - Java - Perl - Kotlin - Clojure
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: moon-dev-ai-agents
ð Description: autonomous ai agents for trading in python
ð Repository URL: https://github.com/moondevonyt/moon-dev-ai-agents
ð Website: https://algotradecamp.com
ð Readme: https://github.com/moondevonyt/moon-dev-ai-agents#readme
ð Statistics:
ð Stars: 2.2K stars
ð Watchers: 100
ðī Forks: 1.1K forks
ðŧ Programming Languages: Python - HTML
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ð Description: autonomous ai agents for trading in python
ð Repository URL: https://github.com/moondevonyt/moon-dev-ai-agents
ð Website: https://algotradecamp.com
ð Readme: https://github.com/moondevonyt/moon-dev-ai-agents#readme
ð Statistics:
ð Stars: 2.2K stars
ð Watchers: 100
ðī Forks: 1.1K forks
ðŧ Programming Languages: Python - HTML
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: agenticSeek
ð Description: Fully Local Manus AI. No APIs, No $200 monthly bills. Enjoy an autonomous agent that thinks, browses the web, and code for the sole cost of electricity. ð Official updates only via twitter @Martin993886460 (Beware of fake account)
ð Repository URL: https://github.com/Fosowl/agenticSeek
ð Website: http://agenticseek.tech
ð Readme: https://github.com/Fosowl/agenticSeek#readme
ð Statistics:
ð Stars: 22.4K stars
ð Watchers: 132
ðī Forks: 2.4K forks
ðŧ Programming Languages: Python - JavaScript - CSS - Shell - Batchfile - HTML - Dockerfile
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: Fully Local Manus AI. No APIs, No $200 monthly bills. Enjoy an autonomous agent that thinks, browses the web, and code for the sole cost of electricity. ð Official updates only via twitter @Martin993886460 (Beware of fake account)
ð Repository URL: https://github.com/Fosowl/agenticSeek
ð Website: http://agenticseek.tech
ð Readme: https://github.com/Fosowl/agenticSeek#readme
ð Statistics:
ð Stars: 22.4K stars
ð Watchers: 132
ðī Forks: 2.4K forks
ðŧ Programming Languages: Python - JavaScript - CSS - Shell - Batchfile - HTML - Dockerfile
ð·ïļ Related Topics:
#ai #agents #autonomous_agents #voice_assistant #llm #llm_agents #agentic_ai #deepseek_r1
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: LinkSwift
ð Description: äļäļŠåšäš JavaScript įį―įæäŧķäļč――å°åč·åå·Ĩå ·ãåšäšãį―įįīéūäļč――åĐæãäŋŪæđ ïžæŊæ įūåšĶį―į / éŋéäšį / äļå―į§ŧåĻäšį / åĪĐįŋžäšį / čŋ é·äšį / åĪļå į―į / UCį―į / 123äšį å ŦåΧį―į
ð Repository URL: https://github.com/hmjz100/LinkSwift
ð Website: https://github.com/hmjz100/LinkSwift/raw/main/%EF%BC%88%E6%94%B9%EF%BC%89%E7%BD%91%E7%9B%98%E7%9B%B4%E9%93%BE%E4%B8%8B%E8%BD%BD%E5%8A%A9%E6%89%8B.user.js
ð Readme: https://github.com/hmjz100/LinkSwift#readme
ð Statistics:
ð Stars: 7.9K stars
ð Watchers: 26
ðī Forks: 371 forks
ðŧ Programming Languages: JavaScript
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: äļäļŠåšäš JavaScript įį―įæäŧķäļč――å°åč·åå·Ĩå ·ãåšäšãį―įįīéūäļč――åĐæãäŋŪæđ ïžæŊæ įūåšĶį―į / éŋéäšį / äļå―į§ŧåĻäšį / åĪĐįŋžäšį / čŋ é·äšį / åĪļå į―į / UCį―į / 123äšį å ŦåΧį―į
ð Repository URL: https://github.com/hmjz100/LinkSwift
ð Website: https://github.com/hmjz100/LinkSwift/raw/main/%EF%BC%88%E6%94%B9%EF%BC%89%E7%BD%91%E7%9B%98%E7%9B%B4%E9%93%BE%E4%B8%8B%E8%BD%BD%E5%8A%A9%E6%89%8B.user.js
ð Readme: https://github.com/hmjz100/LinkSwift#readme
ð Statistics:
ð Stars: 7.9K stars
ð Watchers: 26
ðī Forks: 371 forks
ðŧ Programming Languages: JavaScript
ð·ïļ Related Topics:
#userscript #tampermonkey #aria2 #baidu #baiduyun #tampermonkey_script #baidunetdisk #tampermonkey_userscript #baidu_netdisk #motrix #aliyun_drive #123pan #189_cloud #139_cloud #xunlei_netdisk #quark_netdisk #ali_netdisk #yidong_netdisk #tianyi_netdisk #uc_netdisk
==================================
ð§ By: https://t.me/DataScienceM
Forwarded from Kaggle Data Hub
Unlock premium learning without spending a dime! âïļ @DataScienceC is the first Telegram channel dishing out free Udemy coupons dailyâgrab courses on data science, coding, AI, and beyond. Join the revolution and boost your skills for free today! ð
What topic are you itching to learn next?ð
https://t.me/DataScienceCð
What topic are you itching to learn next?
https://t.me/DataScienceC
Please open Telegram to view this post
VIEW IN TELEGRAM
âĪ3
ðĨ Trending Repository: pytorch
ð Description: Tensors and Dynamic neural networks in Python with strong GPU acceleration
ð Repository URL: https://github.com/pytorch/pytorch
ð Website: https://pytorch.org
ð Readme: https://github.com/pytorch/pytorch#readme
ð Statistics:
ð Stars: 94.5K stars
ð Watchers: 1.8k
ðī Forks: 25.8K forks
ðŧ Programming Languages: Python - C++ - Cuda - C - Objective-C++ - CMake
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: Tensors and Dynamic neural networks in Python with strong GPU acceleration
ð Repository URL: https://github.com/pytorch/pytorch
ð Website: https://pytorch.org
ð Readme: https://github.com/pytorch/pytorch#readme
ð Statistics:
ð Stars: 94.5K stars
ð Watchers: 1.8k
ðī Forks: 25.8K forks
ðŧ Programming Languages: Python - C++ - Cuda - C - Objective-C++ - CMake
ð·ïļ Related Topics:
#python #machine_learning #deep_learning #neural_network #gpu #numpy #autograd #tensor
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: LocalAI
ð Description: ðĪ The free, Open Source alternative to OpenAI, Claude and others. Self-hosted and local-first. Drop-in replacement for OpenAI, running on consumer-grade hardware. No GPU required. Runs gguf, transformers, diffusers and many more. Features: Generate Text, Audio, Video, Images, Voice Cloning, Distributed, P2P and decentralized inference
ð Repository URL: https://github.com/mudler/LocalAI
ð Website: https://localai.io
ð Readme: https://github.com/mudler/LocalAI#readme
ð Statistics:
ð Stars: 36.4K stars
ð Watchers: 241
ðī Forks: 2.9K forks
ðŧ Programming Languages: Go - HTML - Python - JavaScript - Shell - C++
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: ðĪ The free, Open Source alternative to OpenAI, Claude and others. Self-hosted and local-first. Drop-in replacement for OpenAI, running on consumer-grade hardware. No GPU required. Runs gguf, transformers, diffusers and many more. Features: Generate Text, Audio, Video, Images, Voice Cloning, Distributed, P2P and decentralized inference
ð Repository URL: https://github.com/mudler/LocalAI
ð Website: https://localai.io
ð Readme: https://github.com/mudler/LocalAI#readme
ð Statistics:
ð Stars: 36.4K stars
ð Watchers: 241
ðī Forks: 2.9K forks
ðŧ Programming Languages: Go - HTML - Python - JavaScript - Shell - C++
ð·ïļ Related Topics:
#api #ai #mcp #decentralized #text_generation #distributed #tts #image_generation #llama #object_detection #mamba #libp2p #gemma #mistral #audio_generation #llm #stable_diffusion #rwkv #musicgen #rerank
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: PageIndex
ð Description: ðð§ PageIndex: Document Index for Reasoning-based RAG
ð Repository URL: https://github.com/VectifyAI/PageIndex
ð Website: https://pageindex.ai
ð Readme: https://github.com/VectifyAI/PageIndex#readme
ð Statistics:
ð Stars: 3.1K stars
ð Watchers: 24
ðī Forks: 243 forks
ðŧ Programming Languages: Python - Jupyter Notebook
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: ðð§ PageIndex: Document Index for Reasoning-based RAG
ð Repository URL: https://github.com/VectifyAI/PageIndex
ð Website: https://pageindex.ai
ð Readme: https://github.com/VectifyAI/PageIndex#readme
ð Statistics:
ð Stars: 3.1K stars
ð Watchers: 24
ðī Forks: 243 forks
ðŧ Programming Languages: Python - Jupyter Notebook
ð·ïļ Related Topics:
#ai #retrieval #reasoning #rag #llm
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: opentui
ð Description: OpenTUI is a library for building terminal user interfaces (TUIs)
ð Repository URL: https://github.com/sst/opentui
ð Website: https://opentui.com
ð Readme: https://github.com/sst/opentui#readme
ð Statistics:
ð Stars: 3.3K stars
ð Watchers: 19
ðī Forks: 122 forks
ðŧ Programming Languages: TypeScript - Zig - Go - Tree-sitter Query - Shell - Vue
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ð Description: OpenTUI is a library for building terminal user interfaces (TUIs)
ð Repository URL: https://github.com/sst/opentui
ð Website: https://opentui.com
ð Readme: https://github.com/sst/opentui#readme
ð Statistics:
ð Stars: 3.3K stars
ð Watchers: 19
ðī Forks: 122 forks
ðŧ Programming Languages: TypeScript - Zig - Go - Tree-sitter Query - Shell - Vue
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: awesome-rl-for-cybersecurity
ð Description: A curated list of resources dedicated to reinforcement learning applied to cyber security.
ð Repository URL: https://github.com/Kim-Hammar/awesome-rl-for-cybersecurity
ð Readme: https://github.com/Kim-Hammar/awesome-rl-for-cybersecurity#readme
ð Statistics:
ð Stars: 948 stars
ð Watchers: 32
ðī Forks: 137 forks
ðŧ Programming Languages: Not available
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceN
ð Description: A curated list of resources dedicated to reinforcement learning applied to cyber security.
ð Repository URL: https://github.com/Kim-Hammar/awesome-rl-for-cybersecurity
ð Readme: https://github.com/Kim-Hammar/awesome-rl-for-cybersecurity#readme
ð Statistics:
ð Stars: 948 stars
ð Watchers: 32
ðī Forks: 137 forks
ðŧ Programming Languages: Not available
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceN
âĪ1
ðĨ Trending Repository: How-To-Secure-A-Linux-Server
ð Description: An evolving how-to guide for securing a Linux server.
ð Repository URL: https://github.com/imthenachoman/How-To-Secure-A-Linux-Server
ð Readme: https://github.com/imthenachoman/How-To-Secure-A-Linux-Server#readme
ð Statistics:
ð Stars: 20.5K stars
ð Watchers: 339
ðī Forks: 1.3K forks
ðŧ Programming Languages: Not available
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: An evolving how-to guide for securing a Linux server.
ð Repository URL: https://github.com/imthenachoman/How-To-Secure-A-Linux-Server
ð Readme: https://github.com/imthenachoman/How-To-Secure-A-Linux-Server#readme
ð Statistics:
ð Stars: 20.5K stars
ð Watchers: 339
ðī Forks: 1.3K forks
ðŧ Programming Languages: Not available
ð·ïļ Related Topics:
#linux #security #server #hardening #security_hardening #linux_server #cc_by_sa #hardening_steps
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: edgevpn
ð Description: âĩ The immutable, decentralized, statically built p2p VPN without any central server and automatic discovery! Create decentralized introspectable tunnels over p2p with shared tokens
ð Repository URL: https://github.com/mudler/edgevpn
ð Website: https://mudler.github.io/edgevpn
ð Readme: https://github.com/mudler/edgevpn#readme
ð Statistics:
ð Stars: 1.3K stars
ð Watchers: 22
ðī Forks: 149 forks
ðŧ Programming Languages: Go - HTML
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: âĩ The immutable, decentralized, statically built p2p VPN without any central server and automatic discovery! Create decentralized introspectable tunnels over p2p with shared tokens
ð Repository URL: https://github.com/mudler/edgevpn
ð Website: https://mudler.github.io/edgevpn
ð Readme: https://github.com/mudler/edgevpn#readme
ð Statistics:
ð Stars: 1.3K stars
ð Watchers: 22
ðī Forks: 149 forks
ðŧ Programming Languages: Go - HTML
ð·ïļ Related Topics:
#kubernetes #tunnel #golang #networking #mesh_networks #ipfs #nat #blockchain #p2p #vpn #mesh #golang_library #libp2p #cloudvpn #ipfs_blockchain #holepunch #p2pvpn
==================================
ð§ By: https://t.me/DataScienceM
ðĨ Trending Repository: cs-self-learning
ð Description: čŪĄįŪæščŠåĶæå
ð Repository URL: https://github.com/PKUFlyingPig/cs-self-learning
ð Website: https://csdiy.wiki
ð Readme: https://github.com/PKUFlyingPig/cs-self-learning#readme
ð Statistics:
ð Stars: 68.5K stars
ð Watchers: 341
ðī Forks: 7.7K forks
ðŧ Programming Languages: HTML
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
ð Description: čŪĄįŪæščŠåĶæå
ð Repository URL: https://github.com/PKUFlyingPig/cs-self-learning
ð Website: https://csdiy.wiki
ð Readme: https://github.com/PKUFlyingPig/cs-self-learning#readme
ð Statistics:
ð Stars: 68.5K stars
ð Watchers: 341
ðī Forks: 7.7K forks
ðŧ Programming Languages: HTML
ð·ïļ Related Topics: Not available
==================================
ð§ By: https://t.me/DataScienceM
âĪ1
ðĄ Top 70 Web Scraping Operations in Python
I. Making HTTP Requests (
âĒ Import the library.
âĒ Make a GET request to a URL.
âĒ Check the response status code (200 is OK).
âĒ Access the raw HTML content (as bytes).
âĒ Access the HTML content (as a string).
âĒ Access response headers.
âĒ Send a custom User-Agent header.
âĒ Pass URL parameters in a request.
âĒ Make a POST request with form data.
âĒ Handle potential request errors.
II. Parsing HTML with
âĒ Import the library.
âĒ Create a
âĒ Prettify the parsed HTML for readability.
âĒ Access a tag directly by name (gets the first one).
âĒ Navigate to a tag's parent.
âĒ Get an iterable of a tag's children.
âĒ Get the next sibling tag.
âĒ Get the previous sibling tag.
III. Finding Elements with
I. Making HTTP Requests (
requests)âĒ Import the library.
import requests
âĒ Make a GET request to a URL.
response = requests.get('http://example.com')âĒ Check the response status code (200 is OK).
print(response.status_code)
âĒ Access the raw HTML content (as bytes).
html_bytes = response.content
âĒ Access the HTML content (as a string).
html_text = response.text
âĒ Access response headers.
print(response.headers)
âĒ Send a custom User-Agent header.
headers = {'User-Agent': 'My Cool Scraper 1.0'}
response = requests.get('http://example.com', headers=headers)âĒ Pass URL parameters in a request.
params = {'q': 'python scraping'}
response = requests.get('https://www.google.com/search', params=params)âĒ Make a POST request with form data.
payload = {'key1': 'value1', 'key2': 'value2'}
response = requests.post('http://httpbin.org/post', data=payload)âĒ Handle potential request errors.
try:
response = requests.get('http://example.com', timeout=5)
response.raise_for_status() # Raise an exception for bad status codes
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
II. Parsing HTML with
BeautifulSoup (Setup & Navigation)âĒ Import the library.
from bs4 import BeautifulSoup
âĒ Create a
BeautifulSoup object from HTML text.soup = BeautifulSoup(html_text, 'html.parser')
âĒ Prettify the parsed HTML for readability.
print(soup.prettify())
âĒ Access a tag directly by name (gets the first one).
title_tag = soup.title
âĒ Navigate to a tag's parent.
title_parent = soup.title.parent
âĒ Get an iterable of a tag's children.
for child in soup.head.children:
print(child.name)
âĒ Get the next sibling tag.
first_p = soup.find('p')
next_p = first_p.find_next_sibling('p')âĒ Get the previous sibling tag.
second_p = soup.find_all('p')[1]
prev_p = second_p.find_previous_sibling('p')III. Finding Elements with
BeautifulSoupâĒ Find the first occurrence of a tag.
âĒ Find all occurrences of a tag.
âĒ Find tags by their CSS class.
âĒ Find a tag by its ID.
âĒ Find tags by other attributes.
âĒ Find using a list of multiple tags.
âĒ Find using a regular expression.
âĒ Find using a custom function.
âĒ Limit the number of results.
âĒ Use CSS Selectors to find one element.
âĒ Use CSS Selectors to find all matching elements.
âĒ Select direct children using CSS selector.
IV. Extracting Data with
âĒ Get the text content from a tag.
âĒ Get stripped text content.
âĒ Get all text from the entire document.
âĒ Get an attribute's value (like a URL).
âĒ Get the tag's name.
âĒ Get all attributes of a tag as a dictionary.
V. Parsing with
âĒ Import the library.
âĒ Parse HTML content with
âĒ Select elements using an XPath expression.
âĒ Select text content directly with XPath.
âĒ Select an attribute value with XPath.
VI. Handling Dynamic Content (
âĒ Import the
âĒ Initialize a browser driver.
âĒ Navigate to a webpage.
âĒ Find an element by its ID.
âĒ Find elements by CSS Selector.
âĒ Find an element by XPath.
âĒ Click a button.
âĒ Enter text into an input field.
âĒ Wait for an element to become visible.
first_link = soup.find('a')âĒ Find all occurrences of a tag.
all_links = soup.find_all('a')âĒ Find tags by their CSS class.
articles = soup.find_all('div', class_='article-content')âĒ Find a tag by its ID.
main_content = soup.find(id='main-container')
âĒ Find tags by other attributes.
images = soup.find_all('img', attrs={'data-src': True})âĒ Find using a list of multiple tags.
headings = soup.find_all(['h1', 'h2', 'h3'])
âĒ Find using a regular expression.
import re
links_with_blog = soup.find_all('a', href=re.compile(r'blog'))
âĒ Find using a custom function.
# Finds tags with a 'class' but no 'id'
tags = soup.find_all(lambda tag: tag.has_attr('class') and not tag.has_attr('id'))
âĒ Limit the number of results.
first_five_links = soup.find_all('a', limit=5)âĒ Use CSS Selectors to find one element.
footer = soup.select_one('#footer > p')âĒ Use CSS Selectors to find all matching elements.
article_links = soup.select('div.article a')âĒ Select direct children using CSS selector.
nav_items = soup.select('ul.nav > li')IV. Extracting Data with
BeautifulSoupâĒ Get the text content from a tag.
title_text = soup.title.get_text()
âĒ Get stripped text content.
link_text = soup.find('a').get_text(strip=True)âĒ Get all text from the entire document.
all_text = soup.get_text()
âĒ Get an attribute's value (like a URL).
link_url = soup.find('a')['href']âĒ Get the tag's name.
tag_name = soup.find('h1').nameâĒ Get all attributes of a tag as a dictionary.
attrs_dict = soup.find('img').attrsV. Parsing with
lxml and XPathâĒ Import the library.
from lxml import html
âĒ Parse HTML content with
lxml.tree = html.fromstring(response.content)
âĒ Select elements using an XPath expression.
# Selects all <a> tags inside <div> tags with class 'nav'
links = tree.xpath('//div[@class="nav"]/a')
âĒ Select text content directly with XPath.
# Gets the text of all <h1> tags
h1_texts = tree.xpath('//h1/text()')
âĒ Select an attribute value with XPath.
# Gets all href attributes from <a> tags
hrefs = tree.xpath('//a/@href')
VI. Handling Dynamic Content (
Selenium)âĒ Import the
webdriver.from selenium import webdriver
âĒ Initialize a browser driver.
driver = webdriver.Chrome() # Requires chromedriver
âĒ Navigate to a webpage.
driver.get('http://example.com')âĒ Find an element by its ID.
element = driver.find_element('id', 'my-element-id')âĒ Find elements by CSS Selector.
elements = driver.find_elements('css selector', 'div.item')âĒ Find an element by XPath.
button = driver.find_element('xpath', '//button[@type="submit"]')âĒ Click a button.
button.click()
âĒ Enter text into an input field.
search_box = driver.find_element('name', 'q')
search_box.send_keys('Python Selenium')âĒ Wait for an element to become visible.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
âĒ Get the page source after JavaScript has executed.
dynamic_html = driver.page_source
âĒ Close the browser window.
driver.quit()
VII. Common Tasks & Best Practices
âĒ Handle pagination by finding the "Next" link.
next_page_url = soup.find('a', text='Next')['href']âĒ Save data to a CSV file.
import csv
with open('data.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Title', 'Link'])
# writer.writerow([title, url]) in a loop
âĒ Save data to CSV using
pandas.import pandas as pd
df = pd.DataFrame(data, columns=['Title', 'Link'])
df.to_csv('data.csv', index=False)
âĒ Use a proxy with
requests.proxies = {'http': 'http://10.10.1.10:3128', 'https': 'http://10.10.1.10:1080'}
requests.get('http://example.com', proxies=proxies)âĒ Pause between requests to be polite.
import time
time.sleep(2) # Pause for 2 seconds
âĒ Handle JSON data from an API.
json_response = requests.get('https://api.example.com/data').json()âĒ Download a file (like an image).
img_url = 'http://example.com/image.jpg'
img_data = requests.get(img_url).content
with open('image.jpg', 'wb') as handler:
handler.write(img_data)
âĒ Parse a
sitemap.xml to find all URLs.# Get the sitemap.xml file and parse it like any other XML/HTML to extract <loc> tags.
VIII. Advanced Frameworks (
Scrapy)âĒ Create a Scrapy spider (conceptual command).
scrapy genspider example example.com
âĒ Define a
parse method to process the response.# In your spider class:
def parse(self, response):
# parsing logic here
pass
âĒ Extract data using Scrapy's CSS selectors.
titles = response.css('h1::text').getall()âĒ Extract data using Scrapy's XPath selectors.
links = response.xpath('//a/@href').getall()âĒ Yield a dictionary of scraped data.
yield {'title': response.css('title::text').get()}âĒ Follow a link to parse the next page.
next_page = response.css('li.next a::attr(href)').get()
if next_page is not None:
yield response.follow(next_page, callback=self.parse)âĒ Run a spider from the command line.
scrapy crawl example -o output.json
âĒ Pass arguments to a spider.
scrapy crawl example -a category=books
âĒ Create a Scrapy Item for structured data.
import scrapy
class ProductItem(scrapy.Item):
name = scrapy.Field()
price = scrapy.Field()
âĒ Use an Item Loader to populate Items.
from scrapy.loader import ItemLoader
loader = ItemLoader(item=ProductItem(), response=response)
loader.add_css('name', 'h1.product-name::text')
#Python #WebScraping #BeautifulSoup #Selenium #Requests
âââââââââââââââ
By: @DataScienceN âĻ
âĪ3
ðĨ Trending Repository: nocobase
ð Description: NocoBase is the most extensible AI-powered no-code/low-code platform for building business applications and enterprise solutions.
ð Repository URL: https://github.com/nocobase/nocobase
ð Website: https://www.nocobase.com
ð Readme: https://github.com/nocobase/nocobase#readme
ð Statistics:
ð Stars: 17.7K stars
ð Watchers: 147
ðī Forks: 2K forks
ðŧ Programming Languages: TypeScript - JavaScript - Smarty - Shell - Dockerfile - Less
ð·ïļ Related Topics:
==================================
ð§ By: https://t.me/DataScienceM
ð Description: NocoBase is the most extensible AI-powered no-code/low-code platform for building business applications and enterprise solutions.
ð Repository URL: https://github.com/nocobase/nocobase
ð Website: https://www.nocobase.com
ð Readme: https://github.com/nocobase/nocobase#readme
ð Statistics:
ð Stars: 17.7K stars
ð Watchers: 147
ðī Forks: 2K forks
ðŧ Programming Languages: TypeScript - JavaScript - Smarty - Shell - Dockerfile - Less
ð·ïļ Related Topics:
#internal_tools #crud #crm #admin_dashboard #self_hosted #web_application #project_management #salesforce #developer_tools #airtable #workflows #low_code #no_code #app_builder #internal_tool #nocode #low_code_development_platform #no_code_platform #low_code_platform #low_code_framework
==================================
ð§ By: https://t.me/DataScienceM