Provides an example of processing a CSV file containing evidence for device detection. There are 20,000 examples in the supplied file of evidence representing HTTP Headers. For example:
We create a device detection pipeline to read the data and output the time taken and information about about the detection.
Requesting properties from a single component reduces detection time compared with requesting properties from multiple components. If you don't specify any properties to detect, then all properties are detected.
This example requires a local data file. The free 'Lite' data file can be acquired by
pulling the git submodules under this repository (run `git submodule update --recursive`)
or from the device-detection-data
GitHub repository.
The Lite data file is only used for illustration, and has limited accuracy and capabilities.
Find out about the more capable data files that are available on our
pricing page
49 import multiprocessing
as mp
66 flowdata = pipeline.create_flowdata()
69 flowdata.evidence.add(
"header.user-agent", user_agent)
79 if flowdata.device.ismobile.has_value():
80 return flowdata.device.ismobile.value()
87 pipeline = DeviceDetectionOnPremisePipelineBuilder(
88 data_file_path=data_file,
90 performance_profile=
'MaxPerformance',
91 add_javascript_builder=
False,
92 restricted_properties=[
"ismobile"],
94 auto_update=
False).
build()
101 for user_agent
in user_agent_list:
106 results[
"unknown"] += 1
108 results[
"mobile"] += 1
110 results[
"notmobile"] += 1
112 output.put(results, list_number)
116 def run(data_file, skip = False):
124 for x
in range(threads):
125 processes.append(mp.Process(target=process_user_agent_list,
126 args=(data_file, split_lists[x], x, output, skip)))
140 results = [output.get()
for p
in processes]
145 return {
"time": total,
"result": results}
148 if __name__ ==
"__main__":
149 ap = argparse.ArgumentParser(description=
'Run detection benchmark.')
150 ap.add_argument(
'-d',
'--data_file', default=
'', help=
'Path to data file')
151 ap.add_argument(
'-u',
'--user_agents_file', default=
'src/fiftyone_devicedetection_onpremise/cxx/device-detection-data/20000 User Agents.csv', help=
'Path to user agents evidence file')
152 ap.add_argument(
'-j',
'--json_output', default=
'', help=
'Output results in JSON format')
153 args = ap.parse_args()
154 if args.data_file ==
"":
155 args.data_file = ExampleUtils.find_file(
"51Degrees-LiteV4.1.hash")
158 with open(args.user_agents_file, newline=
'')
as file:
159 reader = csv.reader(file)
160 user_agents = list(reader)
162 number_of_user_agents = len(user_agents)
164 print(
"Processing " + str(number_of_user_agents) +
" user agents")
168 threads = mp.cpu_count()
170 print(
"Using " + str(threads) +
" threads")
172 chunk_size = int(number_of_user_agents / threads)
175 split_lists = [user_agents[x:x+chunk_size]
176 for x
in range(0, len(user_agents), chunk_size)]
178 calibration =
run(args.data_file, skip=
True)
180 real =
run(args.data_file, skip=
False)
182 real_time = real[
"time"]
184 print(
"Total time (seconds): " + str(real_time) +
" seconds")
185 print(
"Time per user agent (ms): " + str((real_time / number_of_user_agents) * 1000))
187 if args.json_output !=
"":
189 "DetectionsPerSecond": 1.0 / (real_time / number_of_user_agents),
190 "MsPerDetection": real_time * 1000 / number_of_user_agents
192 with open(args.json_output,
"w")
as file:
193 print(json.dumps(results), file = file)
201 for result
in real[
"result"]:
202 final_result[
"unknown"] += result[
"unknown"]
203 final_result[
"mobile"] += result[
"mobile"]
204 final_result[
"notmobile"] += result[
"notmobile"]
206 print(
"Results", final_result)