\r\n

51Degrees Device Detection Python  4.4

Device Detection services for 51Degrees Pipeline

onpremise/performance.py

Provides an example of processing a CSV file containing evidence for device detection. There are 20,000 examples in the supplied file of evidence representing HTTP Headers. For example:

We create a device detection pipeline to read the data and output the time taken and information about about the detection.

Requesting properties from a single component reduces detection time compared with requesting properties from multiple components. If you don't specify any properties to detect, then all properties are detected.

Please review performance options and hash dataset options for more information about adjusting performance.

This example is available in full on GitHub.

This example requires a local data file. The free 'Lite' data file can be acquired by pulling the git submodules under this repository (run `git submodule update --recursive`) or from the device-detection-data GitHub repository.

The Lite data file is only used for illustration, and has limited accuracy and capabilities. Find out about the more capable data files that are available on our pricing page

Required PyPi Dependencies:

1 # *********************************************************************
2 # This Original Work is copyright of 51 Degrees Mobile Experts Limited.
3 # Copyright 2023 51 Degrees Mobile Experts Limited, Davidson House,
4 # Forbury Square, Reading, Berkshire, United Kingdom RG1 3EU.
5 #
6 # This Original Work is licensed under the European Union Public Licence
7 # (EUPL) v.1.2 and is subject to its terms as set out below.
8 #
9 # If a copy of the EUPL was not distributed with this file, You can obtain
10 # one at https://opensource.org/licenses/EUPL-1.2.
11 #
12 # The 'Compatible Licences' set out in the Appendix to the EUPL (as may be
13 # amended by the European Commission) shall be deemed incompatible for
14 # the purposes of the Work and the provisions of the compatibility
15 # clause in Article 5 of the EUPL shall not apply.
16 #
17 # If using the Work as, or as part of, a network application, by
18 # including the attribution notice(s) required under Article 5 of the EUPL
19 # in the end user terms of the application under an appropriate heading,
20 # such notice(s) shall fulfill the requirements of that article.
21 # *********************************************************************
22 
44 
45 import argparse
46 import csv
47 import json
48 import time
49 import multiprocessing as mp
50 
52 # This example goes through a CSV of 20000 user agents and processes them, returning the time and information about the matches
53 from fiftyone_devicedetection_onpremise.devicedetection_onpremise_pipelinebuilder import DeviceDetectionOnPremisePipelineBuilder
54 
55 # Here we make a function that processes a user agent
56 # And returns if it is a mobile device
57 
58 # global variables will be assigned later in the main block:
59 threads = 0
60 split_lists = None
61 
62 
63 def process_user_agent(user_agent):
64 
65  # First we create the flowdata using the global pipeline
66  flowdata = pipeline.create_flowdata() # pylint: disable=used-before-assignment
67 
68  # Here we add the user agent as evidence
69  flowdata.evidence.add("header.user-agent", user_agent)
70 
71  # We process the flowdata to get the results
72  flowdata.process()
73 
74  # To check whether the User-Agent is a mobile device we look at the ismobile
75  # property inside the Device Detection Engine
76 
77  # first we check if this has a meaningful result
78 
79  if flowdata.device.ismobile.has_value():
80  return flowdata.device.ismobile.value()
81  else:
82  return None
83 
84 def process_user_agent_list(data_file, user_agent_list, list_number, output, skip=False):
85  global pipeline
86  pipeline = DeviceDetectionOnPremisePipelineBuilder(
87  data_file_path=data_file,
88  licence_keys="",
89  performance_profile='MaxPerformance',
90  add_javascript_builder=False,
91  restricted_properties=["ismobile"],
92  usage_sharing=False,
93  auto_update=False).build()
94 
95  results = {
96  "mobile": 0,
97  "notmobile": 0,
98  "unknown": 0
99  }
100  for user_agent in user_agent_list:
101  if skip:
102  break
103  result = process_user_agent(user_agent[0])
104  if(result == None):
105  results["unknown"] += 1
106  if(result == True):
107  results["mobile"] += 1
108  if(result == False):
109  results["notmobile"] += 1
110 
111  output.put(results, list_number)
112 
113 # Run the process
114 def run(data_file, skip = False):
115  # Make a queue to store the results in
116 
117  output = mp.Queue()
118 
119  # Create processes
120  processes = []
121 
122  for x in range(threads): # pylint: disable=used-before-assignment
123  processes.append(mp.Process(target=process_user_agent_list,
124  args=(data_file, split_lists[x], x, output, skip))) # pylint: disable=used-before-assignment
125 
126  # Start timer
127 
128  t0 = time.time()
129 
130  for p in processes:
131  p.start()
132 
133  # Exit the completed processes
134  for p in processes:
135  p.join()
136 
137  # Get process results from the output queue
138  results = [output.get() for p in processes]
139 
140  t1 = time.time()
141  total = t1-t0
142 
143  return {"time": total, "result": results}
144 
145 if __name__ == "__main__":
146  ap = argparse.ArgumentParser(description='Run detection benchmark.')
147  ap.add_argument('-d', '--data_file', default='', help='Path to data file')
148  ap.add_argument('-u', '--user_agents_file', default='src/fiftyone_devicedetection_onpremise/cxx/device-detection-data/20000 User Agents.csv', help='Path to user agents evidence file')
149  ap.add_argument('-j', '--json_output', default='', help='Output results in JSON format')
150  args = ap.parse_args()
151  if args.data_file == "":
152  args.data_file = ExampleUtils.find_file("51Degrees-LiteV4.1.hash")
153 
154  # First we read the contents of the 20000 user agents file as a list
155  with open(args.user_agents_file, newline='') as file:
156  reader = csv.reader(file)
157  user_agents = list(reader)
158 
159  number_of_user_agents = len(user_agents)
160 
161  print("Processing " + str(number_of_user_agents) + " user agents")
162 
163  # Now we make a function that returns results of the user agent matching
164 
165  threads = mp.cpu_count()
166 
167  print("Using " + str(threads) + " threads")
168 
169  chunk_size = int(number_of_user_agents / threads)
170 
171  # Split lists by number of threads
172  split_lists = [user_agents[x:x+chunk_size]
173  for x in range(0, len(user_agents), chunk_size)]
174 
175  calibration = run(args.data_file, skip=True)
176 
177  real = run(args.data_file, skip=False)
178 
179  real_time = real["time"]
180 
181  print("Total time (seconds): " + str(real_time) + " seconds")
182  print("Time per user agent (ms): " + str((real_time / number_of_user_agents) * 1000))
183 
184  if args.json_output != "":
185  results = {
186  "DetectionsPerSecond": 1.0 / (real_time / number_of_user_agents),
187  "MsPerDetection": real_time * 1000 / number_of_user_agents
188  }
189  with open(args.json_output, "w") as file:
190  print(json.dumps(results), file = file)
191 
192  final_result = {
193  "mobile": 0,
194  "notmobile": 0,
195  "unknown": 0
196  }
197 
198  for result in real["result"]:
199  final_result["unknown"] += result["unknown"]
200  final_result["mobile"] += result["mobile"]
201  final_result["notmobile"] += result["notmobile"]
202 
203  print("Results", final_result)