Jupyter Notepad: memory usage for every laptop

The memory on my lab server (Ubuntu) is constantly full due to the fact that users never close old laptops. I would like to get a better idea of ​​how much memory each laptop takes. I can summarize (rough) memory usage for all jupyter laptops that every user controls, but I would like to get total memory usage for each individual laptop so that I can disable these specific memory pigs (or tell another user to close him / her down) . I quickly put together the following code to get approx. memo note use on jupyter core, but I don't know how to associate kernel identifiers with a specific laptop.

import os import pwd import pandas as pd UID = 1 EUID = 2 pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] df = [] for pid in pids: try: ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read() except IOError: # proc has already terminated continue # jupyter notebook processes if len(ret) > 0 and 'share/jupyter/runtime' in ret: process = psutil.Process(int(pid)) mem = process.memory_info()[0] # user name for pid for ln in open('/proc/%d/status' % int(pid)): if ln.startswith('Uid:'): uid = int(ln.split()[UID]) uname = pwd.getpwuid(uid).pw_name # user, pid, memory, proc_desc df.append([uname, pid, mem, ret]) df = pd.DataFrame(df) df.columns = ['user', 'pid', 'memory', 'proc_desc'] df 
+7
source share
2 answers

I seem to understand that for my own solution was created:

 import os import pwd import psutil import re import string import json import urllib2 import pandas as pd UID = 1 EUID = 2 regex = re.compile(r'.+kernel-(.+)\.json') pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] # memory info from psutil.Process df_mem = [] for pid in pids: try: ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read() except IOError: # proc has already terminated continue # jupyter notebook processes if len(ret) > 0 and 'share/jupyter/runtime' in ret: # kernel kernel_ID = re.sub(regex, r'\1', ret) kernel_ID = filter(lambda x: x in string.printable, kernel_ID) # memory process = psutil.Process(int(pid)) mem = process.memory_info()[0] / float(1e9) # user name for pid for ln in open('/proc/{}/status'.format(int(pid))): if ln.startswith('Uid:'): uid = int(ln.split()[UID]) uname = pwd.getpwuid(uid).pw_name # user, pid, memory, kernel_ID df_mem.append([uname, pid, mem, kernel_ID]) df_mem = pd.DataFrame(df_mem) df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID'] # notebook info from assessing ports df_nb = [] for port in xrange(5000,30000): sessions = None try: url = 'http://127.0.0.1:{}/api/sessions'.format(port) sessions = json.load(urllib2.urlopen(url)) except urllib2.URLError: sessions = None if sessions: for sess in sessions: kernel_ID = str(sess['kernel']['id']) notebook_path = sess['notebook']['path'] df_nb.append([port, kernel_ID, notebook_path]) df_nb = pd.DataFrame(df_nb) df_nb.columns = ['port', 'kernel_ID', 'notebook_path'] # joining tables df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner') df.sort(['memory_GB'], ascending=False) 
+4
source

I made some improvements to the sharchaea script for portability and speed.

Basically, check only the ports on which the laptops are running, check the various parameters of the host name, improve the kernel process check and check ipython or jupyter.

 import argparse import re import subprocess import pandas as pd import psutil import requests import tabulate kernel_regex = re.compile(r".+kernel-(.+)\.json") notebook_regex = re.compile(r"(https?://([^:/]+):?(\d+)?)/?(\?token=([a-z0-9]+))?") def get_proc_info(): pids = psutil.pids() # memory info from psutil.Process df_mem = [] for pid in pids: try: proc = psutil.Process(pid) cmd = " ".join(proc.cmdline()) except psutil.NoSuchProcess: continue if len(cmd) > 0 and ("jupyter" in cmd or "ipython" in cmd) and "kernel" in cmd: # kernel kernel_ID = re.sub(kernel_regex, r"\1", cmd) # memory mem = proc.memory_info()[0] / float(1e9) uname = proc.username() # user, pid, memory, kernel_ID df_mem.append([uname, pid, mem, kernel_ID]) df_mem = pd.DataFrame(df_mem) df_mem.columns = ["user", "pid", "memory_GB", "kernel_ID"] return df_mem def get_running_notebooks(): notebooks = [] for n in subprocess.Popen( ["jupyter", "notebook", "list"], stdout=subprocess.PIPE ).stdout.readlines()[1:]: match = re.match(notebook_regex, n.decode()) if match: base_url, host, port, _, token = match.groups() notebooks.append({"base_url": base_url, "token": token}) else: print("Unknown format: {}".format(n.decode())) return notebooks def get_session_info(password=None): df_nb = [] kernels = [] for notebook in get_running_notebooks(): s = requests.Session() if notebook["token"] is not None: s.get(notebook["base_url"] + "/?token=" + notebook["token"]) else: # do a get to the base url to get the session cookies s.get(notebook["base_url"]) if password is not None: # Seems jupyter auth process has changed, need to first get a cookie, # then add that cookie to the data being sent over with the password data = {"password": password} data.update(s.cookies) s.post(notebook["base_url"] + "/login", data=data) res = s.get(notebook["base_url"] + "/api/sessions") if res.status_code != 200: raise Exception(res.json()) for sess in res.json(): kernel_ID = sess["kernel"]["id"] if kernel_ID not in kernels: kernel = { "kernel_ID": kernel_ID, "kernel_name": sess["kernel"]["name"], "kernel_state": sess["kernel"]["execution_state"], "kernel_connections": sess["kernel"]["connections"], # "notebook_url": notebook["base_url"] + "/notebook/" + sess["id"], "notebook_path": sess["path"], } kernel.update(notebook) df_nb.append(kernel) kernels.append(kernel_ID) df_nb = pd.DataFrame(df_nb) del df_nb["token"] return df_nb def parse_args(): parser = argparse.ArgumentParser(description="Find memory usage.") parser.add_argument("--password", help="password (only needed if pass-protected)") return parser.parse_args() def main(password=None, print_ascii=False): df_mem = get_proc_info() df_nb = get_session_info(password) # joining tables df = pd.merge(df_nb, df_mem, on=["kernel_ID"], how="inner") df = df.sort_values("memory_GB", ascending=False).reset_index(drop=True) if print_ascii: print(tabulate.tabulate(df, headers=(df.columns.tolist()))) return df if __name__ == "__main__": args = vars(parse_args()) main(args["password"], print_ascii=True) 

I will probably continue to update this at this point

edit: the code has been updated to work with newer versions of Jupyter that use token authentication, to use only psutil making it compatible with Windows, and to work on Python 3.

+3
source

Source: https://habr.com/ru/post/1240197/


All Articles