|
1 |
| -# python-script-service-guide |
2 |
| -A guide on running a Python script as a service on Windows & Linux. |
| 1 | +# Python Script As A Service |
| 2 | + |
| 3 | + |
| 4 | +[<img src="https://img.shields.io/static/v1?label=&message=python&color=brightgreen" />](https://github.com/topics/python) |
| 5 | +[<img src="https://img.shields.io/static/v1?label=&message=service&color=yellow" />](https://github.com/topics/service) |
| 6 | +[<img src="https://img.shields.io/static/v1?label=&message=Web%20Scraping&color=important" />](https://github.com/topics/web-scraping) |
| 7 | + |
| 8 | +- [Setting Up](#setting-up) |
| 9 | +- [Create A Systemd Service](#create-a-systemd-service) |
| 10 | +- [Create A Windows Service](#create-a-windows-service) |
| 11 | +- [Easier Windows Service Using NSSM](#easier-windows-service-using-nssm) |
| 12 | + |
| 13 | +A service (also known as a "daemon") is a process that performs tasks in the background and responds to system events. |
| 14 | + |
| 15 | +Services can be written using any language. We use Python in these examples as it is one of the most versatile languages out there. |
| 16 | + |
| 17 | +For more information, be sure to read [our blogpost on the subject](https://oxylabs.io/blog/python-script-service-guide). |
| 18 | + |
| 19 | +## Setting Up |
| 20 | + |
| 21 | +To run any of the examples, you will need Python 3. We also recommend [using a virtual environment](https://docs.python.org/3/library/venv.html). |
| 22 | + |
| 23 | +```bash |
| 24 | +python3 -m venv venv |
| 25 | +``` |
| 26 | + |
| 27 | +## Create A Systemd Service |
| 28 | + |
| 29 | +First, create a script that scrapes a website. Make sure the script also handles OS signals so that it exits gracefully. |
| 30 | + |
| 31 | +**linux_scrape.py:** |
| 32 | +```python |
| 33 | +import json |
| 34 | +import re |
| 35 | +import signal |
| 36 | +from pathlib import Path |
| 37 | + |
| 38 | +import requests |
| 39 | +from bs4 import BeautifulSoup |
| 40 | + |
| 41 | +class SignalHandler: |
| 42 | + shutdown_requested = False |
| 43 | + |
| 44 | + def __init__(self): |
| 45 | + signal.signal(signal.SIGINT, self.request_shutdown) |
| 46 | + signal.signal(signal.SIGTERM, self.request_shutdown) |
| 47 | + |
| 48 | + def request_shutdown(self, *args): |
| 49 | + print('Request to shutdown received, stopping') |
| 50 | + self.shutdown_requested = True |
| 51 | + |
| 52 | + def can_run(self): |
| 53 | + return not self.shutdown_requested |
| 54 | + |
| 55 | + |
| 56 | +signal_handler = SignalHandler() |
| 57 | +urls = [ |
| 58 | + 'https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html', |
| 59 | + 'https://books.toscrape.com/catalogue/shakespeares-sonnets_989/index.html', |
| 60 | + 'https://books.toscrape.com/catalogue/sharp-objects_997/index.html', |
| 61 | +] |
| 62 | + |
| 63 | +index = 0 |
| 64 | +while signal_handler.can_run(): |
| 65 | + url = urls[index % len(urls)] |
| 66 | + index += 1 |
| 67 | + |
| 68 | + print('Scraping url', url) |
| 69 | + response = requests.get(url) |
| 70 | + |
| 71 | + soup = BeautifulSoup(response.content, 'html.parser') |
| 72 | + book_name = soup.select_one('.product_main').h1.text |
| 73 | + rows = soup.select('.table.table-striped tr') |
| 74 | + product_info = {row.th.text: row.td.text for row in rows} |
| 75 | + |
| 76 | + data_folder = Path('./data') |
| 77 | + data_folder.mkdir(parents=True, exist_ok=True) |
| 78 | + |
| 79 | + json_file_name = re.sub('[\': ]', '-', book_name) |
| 80 | + json_file_path = data_folder / f'{json_file_name}.json' |
| 81 | + with open(json_file_path, 'w') as book_file: |
| 82 | + json.dump(product_info, book_file) |
| 83 | +``` |
| 84 | + |
| 85 | +Then, create a systemd configuration file. |
| 86 | + |
| 87 | +**/etc/systemd/system/book-scraper.service:** |
| 88 | +``` |
| 89 | +[Unit] |
| 90 | +Description=A script for scraping the book information |
| 91 | +After=syslog.target network.target |
| 92 | +
|
| 93 | +[Service] |
| 94 | +WorkingDirectory=/home/oxylabs/python-script-service/src/systemd |
| 95 | +ExecStart=/home/oxylabs/python-script-service/venv/bin/python3 main.py |
| 96 | +
|
| 97 | +Restart=always |
| 98 | +RestartSec=120 |
| 99 | +
|
| 100 | +[Install] |
| 101 | +WantedBy=multi-user.target |
| 102 | +``` |
| 103 | +Make sure to adjust the paths based on your actual script location. |
| 104 | + |
| 105 | +A fully working example can be found [here](src/systemd/linux_scrape.py). |
| 106 | + |
| 107 | +## Create A Windows Service |
| 108 | + |
| 109 | +To create a Windows service, you will need to implement methods such as `SvcDoRun` and `SvcStop` and handle events sent by the operating system. |
| 110 | + |
| 111 | +**windows_scrape.py:** |
| 112 | +```python |
| 113 | +import sys |
| 114 | +import servicemanager |
| 115 | +import win32event |
| 116 | +import win32service |
| 117 | +import win32serviceutil |
| 118 | +import json |
| 119 | +import re |
| 120 | +from pathlib import Path |
| 121 | + |
| 122 | +import requests |
| 123 | +from bs4 import BeautifulSoup |
| 124 | + |
| 125 | + |
| 126 | +class BookScraperService(win32serviceutil.ServiceFramework): |
| 127 | + _svc_name_ = 'BookScraperService' |
| 128 | + _svc_display_name_ = 'BookScraperService' |
| 129 | + _svc_description_ = 'Constantly updates the info about books' |
| 130 | + |
| 131 | + def __init__(self, args): |
| 132 | + win32serviceutil.ServiceFramework.__init__(self, args) |
| 133 | + self.event = win32event.CreateEvent(None, 0, 0, None) |
| 134 | + |
| 135 | + def GetAcceptedControls(self): |
| 136 | + result = win32serviceutil.ServiceFramework.GetAcceptedControls(self) |
| 137 | + result |= win32service.SERVICE_ACCEPT_PRESHUTDOWN |
| 138 | + return result |
| 139 | + |
| 140 | + def SvcDoRun(self): |
| 141 | + urls = [ |
| 142 | +'https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html', |
| 143 | +'https://books.toscrape.com/catalogue/shakespeares-sonnets_989/index.html', |
| 144 | +'https://books.toscrape.com/catalogue/sharp-objects_997/index.html', |
| 145 | + ] |
| 146 | + |
| 147 | + index = 0 |
| 148 | + |
| 149 | + while True: |
| 150 | + result = win32event.WaitForSingleObject(self.event, 5000) |
| 151 | + if result == win32event.WAIT_OBJECT_0: |
| 152 | + break |
| 153 | + |
| 154 | + url = urls[index % len(urls)] |
| 155 | + index += 1 |
| 156 | + |
| 157 | + print('Scraping url', url) |
| 158 | + response = requests.get(url) |
| 159 | + |
| 160 | + soup = BeautifulSoup(response.content, 'html.parser') |
| 161 | + book_name = soup.select_one('.product_main').h1.text |
| 162 | + rows = soup.select('.table.table-striped tr') |
| 163 | + product_info = {row.th.text: row.td.text for row in rows} |
| 164 | + |
| 165 | + data_folder = Path('C:\\Users\\User\\Scraper\\dist\\scrape\\data') |
| 166 | + data_folder.mkdir(parents=True, exist_ok=True) |
| 167 | + |
| 168 | + json_file_name = re.sub('[\': ]', '-', book_name) |
| 169 | + json_file_path = data_folder / f'{json_file_name}.json' |
| 170 | + with open(json_file_path, 'w') as book_file: |
| 171 | + json.dump(product_info, book_file) |
| 172 | + |
| 173 | + def SvcStop(self): |
| 174 | + self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) |
| 175 | + win32event.SetEvent(self.event) |
| 176 | + |
| 177 | + |
| 178 | +if __name__ == '__main__': |
| 179 | + if len(sys.argv) == 1: |
| 180 | + servicemanager.Initialize() |
| 181 | + servicemanager.PrepareToHostSingle(BookScraperService) |
| 182 | + servicemanager.StartServiceCtrlDispatcher() |
| 183 | + else: |
| 184 | + win32serviceutil.HandleCommandLine(BookScraperService) |
| 185 | +``` |
| 186 | + |
| 187 | +Next, install dependencies and run a post-install script. |
| 188 | + |
| 189 | +``` |
| 190 | +PS C:\> cd C:\Users\User\Scraper |
| 191 | +PS C:\Users\User\Scraper> .\venv\Scripts\pip install pypiwin32 |
| 192 | +PS C:\Users\User\Scraper> .\venv\Scripts\pywin32_postinstall.py -install |
| 193 | +``` |
| 194 | + |
| 195 | +Bundle your script into an executable. |
| 196 | +``` |
| 197 | +PS C:\Users\User\Scraper> venv\Scripts\pyinstaller --hiddenimport win32timezone -F scrape.py |
| 198 | +``` |
| 199 | + |
| 200 | +And finally, install your newly-created service. |
| 201 | +``` |
| 202 | +PS C:\Users\User\Scraper> .\dist\scrape.exe install |
| 203 | +Installing service BookScraper |
| 204 | +Changing service configuration |
| 205 | +Service updated |
| 206 | +
|
| 207 | +PS C:\Users\User\Scraper> .\dist\scrape.exe start |
| 208 | +Starting service BookScraper |
| 209 | +PS C:\Users\User\Scripts> |
| 210 | +``` |
| 211 | + |
| 212 | +A fully working example can be found [here](src/windows-service/windows_scrape.py). |
| 213 | + |
| 214 | +## Easier Windows Service Using NSSM |
| 215 | + |
| 216 | +Instead of dealing with the Windows service layer directly, you can use the NSSM (Non-Sucking Service Manager). |
| 217 | + |
| 218 | +Install NSSM by visiting [the official website](https://nssm.cc/download). Extract it to a folder of your choice and add the folder to your PATH environment variable for convenience. |
| 219 | + |
| 220 | +Once you have NSSM installed, simplify your script by getting rid of all Windows-specific methods and definitions. |
| 221 | + |
| 222 | +**simple_scrape.py:** |
| 223 | +```python |
| 224 | +import json |
| 225 | +import re |
| 226 | +from pathlib import Path |
| 227 | + |
| 228 | +import requests |
| 229 | +from bs4 import BeautifulSoup |
| 230 | + |
| 231 | +urls = ['https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html', |
| 232 | + 'https://books.toscrape.com/catalogue/shakespeares-sonnets_989/index.html', |
| 233 | + 'https://books.toscrape.com/catalogue/sharp-objects_997/index.html', ] |
| 234 | + |
| 235 | +index = 0 |
| 236 | + |
| 237 | +while True: |
| 238 | + url = urls[index % len(urls)] |
| 239 | + index += 1 |
| 240 | + |
| 241 | + print('Scraping url', url) |
| 242 | + response = requests.get(url) |
| 243 | + |
| 244 | + soup = BeautifulSoup(response.content, 'html.parser') |
| 245 | + book_name = soup.select_one('.product_main').h1.text |
| 246 | + rows = soup.select('.table.table-striped tr') |
| 247 | + product_info = {row.th.text: row.td.text for row in rows} |
| 248 | + |
| 249 | + data_folder = Path('C:\\Users\\User\\Scraper\\data') |
| 250 | + data_folder.mkdir(parents=True, exist_ok=True) |
| 251 | + |
| 252 | + json_file_name = re.sub('[\': ]', '-', book_name) |
| 253 | + json_file_path = data_folder / f'{json_file_name}.json' |
| 254 | + with open(json_file_path, 'w') as book_file: |
| 255 | + json.dump(product_info, book_file) |
| 256 | +``` |
| 257 | + |
| 258 | +Bundle your script into an executable. |
| 259 | +``` |
| 260 | +PS C:\Users\User\Scraper> venv\Scripts\pyinstaller -F simple_scrape.py |
| 261 | +``` |
| 262 | + |
| 263 | +And finally, install the script using NSSM. |
| 264 | +``` |
| 265 | +PS C:\> nssm.exe install SimpleScrape C:\Users\User\Scraper\dist\simple_scrape.exe |
| 266 | +PS C:\Users\User\Scraper> .\venv\Scripts\pip install pypiwin32 |
| 267 | +``` |
| 268 | + |
| 269 | +A fully working script can be found [here](src/nssm/simple_scrape.py). |
0 commit comments