diff --git a/samples/samples_GPL/built-in/fire_detection/CMakeLists.txt b/samples/samples_GPL/built-in/fire_detection/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..b5d02103e09e523965aa0ebb1436481138b36a28 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) ModelZoo. 2025-2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DETECTION_YOLO11S) + +add_subdirectory("./src") \ No newline at end of file diff --git a/samples/samples_GPL/built-in/fire_detection/LICENSE b/samples/samples_GPL/built-in/fire_detection/LICENSE new file mode 100755 index 0000000000000000000000000000000000000000..92b370f0e0e1b91cf8baf5d0f78c56a9824c39f1 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/LICENSE @@ -0,0 +1,674 @@ +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/samples/samples_GPL/built-in/fire_detection/README.md b/samples/samples_GPL/built-in/fire_detection/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31c93c65ca4d96597aaef71da899d91c94770ad3 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/README.md @@ -0,0 +1,313 @@ +# 基于 fire_detection 网络实现目标检测 + +## 目录 +- [1. 概述](#1-概述) + - [输入输出数据](#输入输出数据) + - [目录结构](#目录结构) +- [2. 快速开始](#2-快速开始) +- [3. 模型训练与优化](#3-模型训练与优化) + - [训练环境准备](#训练环境准备) + - [训练数据集准备](#训练数据集准备) + - [模型训练](#模型训练) + - [模型剪枝](#模型剪枝) + - [模型量化](#模型量化) +- [4. 模型转换 (ONNX & OM)](#4-模型转换-onnx--om) +- [5. 模型推理验证](#5-模型推理验证) + - [推理环境准备](#推理环境准备) + - [准备推理数据集](#准备推理数据集) + - [编译运行与后处理](#编译运行与后处理) +- [6. 模型推理性能与精度](#6-模型推理性能与精度) + +--- + +## 1. 概述 + +火焰检测网络模型是基于 YOLO11n 网络开发的适配端侧芯片的检测网络,在继承了原有 YOLO 网络模型优点的基础上,增加部分 attention 机制帮助火焰特征提取。使用剪枝以及量化的小型化操作,在保持精度情况下,大大降低网络的资源消耗与单帧推理延时。 + +- 参考 YOLO11 实现: + ``` + https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/11/yolo11.yaml + ``` + +### 输入输出数据 + +- **输入数据** + + | 输入数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | ---------------- | ------------ | + | images | NV21 | 1 x 3 x 384 x 640 | YVU420SP | + +- **输出数据** + + | 输出数据 | 数据类型 | 大小 | + | -------- | -------- | ----------- | + | output0 | FP32 | 5 x 5040 | + +### 目录结构 + +样例代码结构如下所示: + +```text +├── data +│ ├── ... // 测试数据 +├── script +│ ├── drawRectangle.py // 画框验证脚本 +│ ├── accuracy_optg.py // 精度评测脚本 +│ ├── pth2onnx.py // 模型导出脚本 +├── src +│ ├── acl.json // 系统初始化的配置文件 +│ ├── CMakeLists.txt // src下编译脚本 +│ ├── main.cpp // 板端推理 sample 的实现文件 +├── train +| ├── fire_data.yaml // 数据集配置文件 +| ├── fire_model.yaml // 模型配置文件 +| ├── fire_train.yaml // 训练配置文件 +| ├── prune.ipynb // 剪枝 notebook +| ├── yolo.patch // yolo补丁文件 +├── model +│ ├── fire_detectionV1.om // 火焰检测模型文件(需自行下载或转换产生) +├── CMakeLists.txt // 外层编译脚本 +├── *.json // 模型其他信息与配置文件 +├── LICENSE // 许可文件 +``` + +--- + +## 2. 快速开始 + +> **注**:本章节包含无需自己训练和转换,直接使用现有 OM 模型并在板端运行推理的完整流程。 + +**步骤一:获取模型** + +提供转化成功的 `om` 模型文件,可以从[网站](https://modelzoo.hispark.hisilicon.com/#/ModelZoo)上进行下载。 + +创建 `model` 文件夹,并将下载好的 om 模型文件移动到 `./model` 目录下。 +```bash +mkdir -p model +``` +*(注:若需要体验从 pth 到 om 模型的转化过程,请参考后文的[模型转换](#4-模型转换-onnx--om)章节。)* + +**步骤二:编译代码 (在PC交叉编译环境执行)** + +1. **环境准备**:在 PC 端安装 CANN 包,依赖 SDK 头文件和动态库在 CANN 包安装目录下,以 Hi3516CV610 为例(假设 CANN 包安装路径为 `$HOME/Ascend/`),配置环境变量: + ```bash + export NPU_INCLUDE_PATH=$HOME/Ascend/ascend-toolkit/svp_latest/acllib/include/acl + export NPU_LIB_PATH=$HOME/Ascend/ascend-toolkit/svp_latest/acllib/lib32/stub + ``` + 在 PC 端安装交叉编译工具链(在版本包中找到 `gcc-20250305-arm-v01c02-linux-musleabi`): + ```bash + cd gcc-20250305-arm-v01c02-linux-musleabi + ./install_gcc_toolchain.sh + export PATH=/opt/linux/x86-arm/arm-v01c02-linux-musleabi/bin:$PATH + ``` + +2. 切换到 `fire_detection` 目录,创建目录用于存放编译生成的文件: + ```bash + mkdir build + cd build + ``` + +3. 执行 cmake 命令生成编译文件: + ```bash + cmake ../src -DCMAKE_TOOLCHAIN_FILE=../../../common/cmake/toolchain_aarch64_610_linux.cmake -DSOC_VERSION=Hi3516CV610 + ``` + +4. 执行 make 命令生成可执行文件,可执行文件 `main` 将生成于 `./out` 目录下: + ```bash + make + ``` + +**步骤三:运行应用 (在板端环境中执行)** + +1. 将整个 `modelzoo` 代码目录上传到板端运行环境。(或者使用mount命令挂载PC的modelzoo目录到板端) +2. 以运行用户登录板端运行环境。 +3. 切换到可执行文件 `main` 所在的目录并赋予执行权限: + ```bash + chmod +x main + ``` +4. 配置可执行文件依赖动态库的搜索路径: + ```bash + export LD_LIBRARY_PATH=$HOME/modelzoo/samples/samples_GPL/opensource/opencv/lib/aarch64_610_linux:$LD_LIBRARY_PATH + ``` +5. 运行可执行文件进行测试推理: + ```bash + ./main --model ../model/fire_detectionV2.om --input ../data/file_list.json + ``` + *参数说明*: + - `--model`:om 模型路径 + - `--input`:输入数据列表配置文件路径。(通过修改该文件中的 `loop` 变量可以控制循环次数。`loop` 为 1 包含加载耗时,建议设为 100 以求平均性能)。 + + **结果查看**: + 推理结果(bin态)会保存在 `out/result/bin` 目录下,后处理 bbox 结果保存在 `result/txt` 目录下。 + 板端输出性能示例 (SVP_NNN平台):`[INFO] time: 20.38ms, fps: 49.06` + +--- + +## 3. 模型训练与优化 + +### 训练环境准备 +1. 安装 YOLO 基础环境与依赖: + ```bash + git clone https://github.com/ultralytics/ultralytics + cd ultralytics + git checkout 94fac3903612fb03cab007734a8d1ce86de5376e + + # Python >= 3.8(建议 python3.9.0) + pip3 install -e . + ``` + *(注:该命令会自动安装所需的其他依赖库)* + +2. 打入本仓库下针对 fire detection 的定制 Patch: + ```bash + # 命令执行在ultralytics目录下 + cp ~/workspace/modelzoo/samples/samples_GPL/built-in/fire_detection/train/yolo.patch . + git apply yolo.patch + ``` + +### 训练数据集准备 +训练需要正负样本,以下为本模型相关的数据集参考(仅保留火焰相关标注): +1. **fasdd 数据集** (正负样本): [链接](https://www.scidb.cn/en/detail?dataSetId=ce9c9400b44148e1b0a749f5c3eb0bda) +2. **DFS 数据集** (正样本): [链接](https://github.com/siyuanwu/DFS-FIRE-SMOKE-Dataset) +3. **D-fire 数据集** (正负样本): [链接](https://github.com/gaia-solutions-on-demand/DFireDataset) +4. **S2TLD 数据集** (负样本): [链接](https://github.com/Thinklab-SJTU/S2TLD) +5. **Lamp_detection** (负样本): [链接](https://universe.roboflow.com/michael-shearer/lamp-detector) + +### 模型训练 +相关配置文件存在于 `train` 目录下: +- **模型结构**:`train/fire_model.yaml` +- **数据配置**:`train/fire_data.yaml` +- **训练配置**:`train/fire_train.yaml` + +**开启训练**: +```bash +cd train +yolo cfg=fire_train.yaml +``` + +### 模型剪枝 +当前 `fire_model.yaml` 已经是大模型剪枝后得到的小型化结构。若需要体验或调整大模型剪枝小模型的流程,可运行配套的 `prune.ipynb`。完整的剪枝步骤包含: +1. 可剪枝模块依赖性分析 +2. 剪枝敏感度分析 +3. 确定各个模块的剪枝比例 +4. 执行全局剪枝 +5. 剪枝后的 Fine-tune (微调) 恢复精度 + +### 模型量化 +利用 PTQ(训练后量化)以及 ATC 工具,可选择不同的量化模式实现性能提升。若全局量化精度下降严重,可利用 `mindcmd` 伪量化分析工具,挑选出相似度较低(建议阈值 < 0.95)的层保留在 FP16 计算,以此开启混精度。 + +针对火焰检测大模型的高精度敏感层配置示例: +```text +--hight_precision_later="/model.10/m/m.0/attn/MatMul;/model.10/m/m.0/attn/Softmax;/model.10/m/m.0/attn/MatMul_1;/model.10/m/m.0/attn/Reshape_1;/model.10/m/m.0/attn/Add;/model.10/m/m.0/Add;/model.10/m/m.0/ffn/ffn.0/conv/Conv;/model.10/m/m.0/Add_1;/model.26/cv2.2/cv2.2.2.0/conv/Conv;/model.26/cv2.2/cv2.2.1/conv/Conv;/model.26/cv2.2/cv2.2.2/Conv;/model.26/Concat_2;/model.26/Reshape_2;/model.26/Concat_3;/model.26/Split;/model.26/Sigmoid;/model.26/dfl/Reshape;/model.26/dfl/Transpose;/model.26/dfl/Softmax;/model.26/dfl/conv/Conv;/model.26/dfl/Reshape_1;/model.26/Slice;/model.26/Slice_1;/model.26/Sub_1;/model.26/Mul_2;/model.26/Concat_5" +``` + +--- + +## 4. 模型转换 (ONNX & OM) + +此流程介绍如何将自行训练的 `*.pt` 模型转换为可于板端执行的 `*.om` 模型。 + +1. **获取待转化权重文件** + 假设已准备好 `fire_detectionV1.pt`,将其置于 `model` 文件夹内: + ```bash + mkdir -p model + # 移动或下载模型到 ./model/fire_detectionV1.pt + ``` + +2. **导出 ONNX 模型** + 使用提供的代码脚本导出,或通过 ultralytics 原生支持导出: + ```bash + cd script + python pth2onnx.py + cd ../ + ``` + *(或者由 yolo 直接执行:`yolo export model="fire_detectionV1.pt" format=onnx opset=13 project=export name="fire_detectionV1.onnx"`)* + +3. **ATC 工具转 OM 模型** + - **对于 Hi3516CV610 (SVP_NNN)**: + ```bash + atc --framework=5 --model="fire.onnx" --input_shape="images:1,3,384,640" --insert_op_conf="../model_cfg/HI3516CV610/insert_op.conf" --output="fire_detectionV1" --images_list="../data/image_ref_list.txt" --soc_version=Hi3516CV610 --compile_mode=0 --hight_precision_later="/model.10/m/m.0/attn/MatMul;/model.10/m/m.0/attn/Softmax;/model.10/m/m.0/attn/MatMul_1;/model.10/m/m.0/attn/Reshape_1;/model.10/m/m.0/attn/Add;/model.10/m/m.0/Add;/model.10/m/m.0/ffn/ffn.0/conv/Conv;/model.10/m/m.0/Add_1;/model.26/cv2.2/cv2.2.2.0/conv/Conv;/model.26/cv2.2/cv2.2.1/conv/Conv;/model.26/cv2.2/cv2.2.2/Conv;/model.26/Concat_2;/model.26/Reshape_2;/model.26/Concat_3;/model.26/Split;/model.26/Sigmoid;/model.26/dfl/Reshape;/model.26/dfl/Transpose;/model.26/dfl/Softmax;/model.26/dfl/conv/Conv;/model.26/dfl/Reshape_1;/model.26/Slice;/model.26/Slice_1;/model.26/Sub_1;/model.26/Mul_2;/model.26/Concat_5" --online_model_type=0 + ``` + *(关于ATC参数详情请查阅对应环境的手册资料;编译模式 0 为推荐量化等级以达到最佳的端侧性能)* + *(image_ref_list中为量化校准图片,可以从数据集中挑选10-20张典型场景图片作为量化校准图片)* + +--- + +## 5. 模型推理验证 + +如果您跳过了“快速开始”或者更换了板端/SOC类型,可以通过以下步骤使用评测数据集走完推理全流程。 + +### 推理环境准备 + +验证芯片名称以选用正确的包版本: +```bash +cat /proc/umap/sys +# 预期回显示例形如: [SYS] Version: Hi3516CV610 +``` + +**版本配套表** +| 芯片型号 | 算力引擎 | CANN包版本 | 编译工具链 | SDK版本 | +| ----------- | ---------- | ------------------- | ------------------------------ | ------------------------- | +| Hi3516CV610 | SVP_NNN | SVP_NN_PC_V5.0.2.3 | arm-v01c02-linux-musleabi-gcc | Hi3516CV610R001C01SP020 | + +### 准备推理数据集 + +1. **获取原始测试数据集**: + 下载 **fasdd 数据集** : [链接](https://www.scidb.cn/en/detail?dataSetId=ce9c9400b44148e1b0a749f5c3eb0bda),并在当前源码根目录下创建 `fasdd` 文件夹: + ``` + fasdd + ├── train + │ ├── images + │ └── labels + └── val + | ├── images + | └── labels + └── test + ├── images + └── labels + ``` + +2. **数据集目录声明 (NNN 必须)**: + ```bash + python3 ../../../../utils/generate_file_list.py fasdd/test + ``` + +### 编译运行与后处理 + +1. **PC端编译**: + 按您的实际运行操作系统和 SOC_VERSION 选择对应的 toolchain 执行 CMake 编译: + ```bash + mkdir -p build && cd build + + cmake ../src -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=../../../../common/cmake/toolchain_aarch64_610_linux.cmake -DSOC_VERSION=Hi3516CV610 + + make + ``` + +2. **板端运行**: + 登入板端,确保含有二进制执行权限。根据平台执行: + + - **Hi3516CV610 SVP_NNN** + ```bash + ./main ../../model/fire_detectionV1.om ../../fasdd/test/file_list.txt + ``` + +3. **精度与可视化验证**: + 推理完毕之后,计算 mAP: + ```bash + # SVP_NNN 平台 + python accuracy.py --bin_dir ../coco/result/bin --img_dir ../coco/val2017 --output_json ../coco/result.json --gt_annotations ../coco/annotations/instances_val2017.json + ``` + 利用脚本可视化画出边界框(仅 SVP_NNN): + ```bash + python script/drawRectangle.py --image xx.jpg --annotation result/txt/xx_result.txt + ``` + +--- + +## 6. 模型推理性能与精度 + +下方表格展示了在不同芯片引擎上的 `yolo11s` 参考打底指标,您可以根据此表验证环境与配置是否正确。 + +| 芯片型号 | Batch Size | 测试数据集 | AP(IoU=0.50) | AP(IoU=0.50:0.95) | 性能(fps) | +| ------------------- | ---------- | ---------- | -------------- | ------------------- | ----------- | +| Hi3516CV610 SVP_NNN | 1 | fasdd | 81.4% | 53.0% | 49.06 | diff --git a/samples/samples_GPL/built-in/fire_detection/fire_detection.json b/samples/samples_GPL/built-in/fire_detection/fire_detection.json new file mode 100644 index 0000000000000000000000000000000000000000..4ac95cd260cde81a074d4df024a0e8e1c1755660 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/fire_detection.json @@ -0,0 +1,95 @@ +{ + "modelName": "fire_detection", + "modelDesc": "火焰检测网络模型是基于 YOLO11n 网络开发的适配端侧芯片的检测网络,在继承了原有 YOLO 网络模型优点的基础上,增加部分 attention 机制帮助火焰特征提取。使用剪枝以及量化的小型化操作,在保持精度情况下,大大降低网络的资源消耗与单帧推理延时。", + "modelRepository": "https://gitee.com/Hispark/modelzoo/tree/master/samples/samples_GPL/built-in/fire_detection", + "modelParameter": { + "输入": "384x640", + "参数量": "0.817M", + "计算量": "3.3GFLOPs" + }, + "isBeta": false, + "betaDesc": "", + "modelFeedback": "https://developers.hisilicon.com/forum/0155201230363076006", + "modelUsageScenes": [ + { + "task": "Vision", + "tags": [ + "检测" + ] + } + ], + "modelFrame": [ + "PyTorch" + ], + "modelDatasets": { + "desc": "fasdd数据集", + "link": "https://www.scidb.cn/en/detail?dataSetId=ce9c9400b44148e1b0a749f5c3eb0bda" + }, + "srcModelLicense": [ + { + "desc": "源模型", + "link": "https://github.com/ultralytics/ultralytics/blob/master/LICENSE" + } + ], + "modelLicense": [ + { + "desc": "部署模型", + "link": "https://gitee.com/HiSpark/modelzoo/blob/master/samples/samples_GPL/built-in/fire_detection/LICENSE" + } + ], + "quickStart": { + "md": "https://gitee.com/HiSpark/modelzoo/tree/master/samples/samples_GPL/built-in/fire_detection/doc/快速开始.md" + }, + "modelChipset": [ + { + "chipset": "Hi3516CV610", + "tools": [ + { + "name": "CANN工具", + "link": "", + "desc": "SVP_NN_PC_V5.0.2.3 (请联系FAE获取)" + }, + { + "name": "编译工具链", + "link": "", + "desc": "gcc-20250305-arm-v01c02-linux-musleabi (请联系FAE获取)" + }, + { + "name": "SDK", + "link": "", + "desc": "Hi3516CV610R001C01SP020 (请联系FAE获取)" + } + ], + "os": [ + "Linux" + ], + "performance": [ + { + "quantMode": "a8w8", + "detail": [ + { + "performance": 20.38, + "performanceUnit": "耗时(ms)", + "performanceDesc": "" + }, + { + "performance": 49.06, + "performanceUnit": "性能(fps)", + "performanceDesc": "" + }, + { + "performance": 20.79, + "performanceUnit": "单帧内存带宽(MB)", + "performanceDesc": "" + }, + { + "performance": 6.30, + "performanceUnit": "内存(MB)", + "performanceDesc": "" + } + ] + } + ] + } + ] +} diff --git a/samples/samples_GPL/built-in/fire_detection/script/accuracy.py b/samples/samples_GPL/built-in/fire_detection/script/accuracy.py new file mode 100755 index 0000000000000000000000000000000000000000..0163bcf59201ca0bfceb10056e8b471d19560c4f --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/script/accuracy.py @@ -0,0 +1,312 @@ +# Copyright (c) ModelZoo. 2025-2026. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import glob +import struct +import numpy as np +import torch +import argparse +from PIL import Image +from torchvision.ops import batched_nms +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +def parse_yolo_labels_to_coco(img_dir, labels_dir, output_gt_json, name_to_id): + """ + 遍历labels_dir下的所有txt文件,将其YOLO相对坐标转换为COCO绝对坐标, + 并保存为COCO evaluation所需的JSON格式。 + """ + dataset = { + "images": [], + "annotations": [], + "categories": [{"id": 0, "name": "fire"}] + } + + ann_id = 1 + # 查找所有图片 + img_files = glob.glob(os.path.join(img_dir, "*.*")) + img_exts = {'.jpg', '.png', '.jpeg'} + img_files = [f for f in img_files if os.path.splitext(f)[1].lower() in img_exts] + + print(f"在 {img_dir} 找到 {len(img_files)} 张图片用于构建 Ground Truth。") + + for img_path in img_files: + base_name = os.path.splitext(os.path.basename(img_path))[0] + img_id = name_to_id[base_name] + + try: + with Image.open(img_path) as img: + img_width, img_height = img.size + except Exception as e: + print(f"无法打开GT图片 {img_path}: {e}") + continue + + dataset["images"].append({ + "id": img_id, + "width": img_width, + "height": img_height, + "file_name": os.path.basename(img_path) + }) + + label_path = os.path.join(labels_dir, f"{base_name}.txt") + if os.path.exists(label_path): + with open(label_path, 'r') as f: + lines = f.readlines() + for line in lines: + parts = line.strip().split() + if len(parts) >= 5: + cls_id = int(parts[0]) + # 火焰检测仅有火焰类,强制统一归一化为类 0 + if cls_id != 0: + cls_id = 0 + + x_center_rel = float(parts[1]) + y_center_rel = float(parts[2]) + w_rel = float(parts[3]) + h_rel = float(parts[4]) + + w = w_rel * img_width + h = h_rel * img_height + x_min = (x_center_rel * img_width) - (w / 2) + y_min = (y_center_rel * img_height) - (h / 2) + + dataset["annotations"].append({ + "id": ann_id, + "image_id": img_id, + "category_id": 0, + "bbox": [round(x_min, 2), round(y_min, 2), round(w, 2), round(h, 2)], + "area": round(w * h, 2), + "iscrowd": 0 + }) + ann_id += 1 + + with open(output_gt_json, 'w') as f: + json.dump(dataset, f) + print(f"GT 转换完成,Ground Truth JSON 已包含 {len(dataset['images'])} 张图片, {len(dataset['annotations'])} 个真实标注框,存至: {output_gt_json}") + return output_gt_json + + +def parse_yolo_bin_files(bin_dir, img_dir, output_file, name_to_id, nms_threshold=0.6, conf_threshold=0.001, target_size=(640, 640)): + """ + 解析bin文件,使用torchvision的batched_nms按类别执行NMS,保存结果 + """ + image_results = {} + bin_files = glob.glob(os.path.join(bin_dir, "*.bin")) + print(f"找到 {len(bin_files)} 个 bin 预测文件...") + + for bin_path in bin_files: + base_name = os.path.splitext(os.path.basename(bin_path))[0] + file_name = base_name.replace("_result", "") if "_result" in base_name else base_name + + # 通过在映射中取id确保文件名与预测能稳定对应(即使非数字文件也能支持) + if file_name not in name_to_id: + continue + img_id = name_to_id[file_name] + + # 寻找对应的图片 + # 兼容 .jpg/.png + img_path = os.path.join(img_dir, f"{file_name}.jpg") + if not os.path.exists(img_path): + img_path = os.path.join(img_dir, f"{file_name}.png") + + if not os.path.exists(img_path): + print(f"警告: 预测结果找不着对应的原图 {file_name},跳过该预测") + continue + + # 获取图片宽高 + try: + with Image.open(img_path) as img: + img_width, img_height = img.size + except Exception as e: + print(f"警告: 无法打开图片 {img_path},错误: {e},跳过该预测") + continue + + # 解析bin文件并收集所有框信息 + try: + with open(bin_path, 'rb') as f: + data = f.read() + total_floats = len(data) // 4 + rows = total_floats // 84 + + if rows != 8400: + print(f"警告: {bin_path} 包含 {rows} 行,预期8400行,跳过") + continue + + # 存储当前图像的所有框(筛选后) + all_bboxes = [] # 存储[x_min, y_min, x_max, y_max]格式 + all_scores = [] + all_categories = [] + all_infos = [] + + for row in range(rows): + start_idx = row * 84 * 4 + end_idx = start_idx + 84 * 4 + row_data = data[start_idx:end_idx] + + floats = struct.unpack('84f', row_data) + + x_center_rel, y_center_rel, w_rel, h_rel = floats[:4] + class_scores = floats[4:] + class_id = class_scores.index(max(class_scores)) + confidence = max(class_scores) + + # 🔥 火焰检测这里仅计算 class_id == 0 这一类框 + if class_id != 0: + continue + + # 过滤低置信度框 + if confidence > conf_threshold: + # 坐标反变换映射,结合推理图的 padding 和 scale + scale = min(target_size[0] / img_width, target_size[1] / img_height) + new_w, new_h = int(img_width * scale), int(img_height * scale) + pad_w, pad_h = (target_size[0] - new_w) // 2, (target_size[1] - new_h) // 2 + + x_center = (x_center_rel - pad_w) / scale + y_center = (y_center_rel - pad_h) / scale + width = w_rel / scale + height = h_rel / scale + + x_min = x_center - width / 2 + y_min = y_center - height / 2 + x_max = x_center + width / 2 + y_max = y_center + height / 2 + + # 边界裁剪防溢出 + x_min = np.clip(x_min, 0, img_width) + y_min = np.clip(y_min, 0, img_height) + x_max = np.clip(x_max, 0, img_width) + y_max = np.clip(y_max, 0, img_height) + + category_id = 0 # 强制只包含单类类别 0 的火焰 + + # 存储NMS所需格式(xyxy)和额外信息 + all_bboxes.append([x_min, y_min, x_max, y_max]) + all_scores.append(confidence) + all_categories.append(category_id) + all_infos.append({ + "image_id": img_id, + "category_id": category_id, + "score": confidence, + "coco_bbox": [x_min, y_min, x_max - x_min, y_max - y_min] # 预存COCO格式bbox + }) + + # 使用 torchvision 执行 NMS + if all_bboxes: + # 转换为PyTorch张量 + bboxes_tensor = torch.tensor(all_bboxes, dtype=torch.float32) + scores_tensor = torch.tensor(all_scores, dtype=torch.float32) + categories_tensor = torch.tensor(all_categories, dtype=torch.int64) + + # 按类别执行NMS + keep_indices = batched_nms( + boxes=bboxes_tensor, + scores=scores_tensor, + idxs=categories_tensor, # 按类别ID分组 + iou_threshold=nms_threshold + ).numpy() # 转换为numpy索引 + + # 记录最终幸存框信息 + for idx in keep_indices: + info = all_infos[idx] + image_results.setdefault(img_id, []).append({ + "image_id": info["image_id"], + "category_id": info["category_id"], + "bbox": [float(v) for v in info["coco_bbox"]], + "score": float(info["score"]) + }) + + except Exception as e: + print(f"处理预测文件 {bin_path} 时出错: {e}") + continue + + # 汇总写入输出 + final_results = [] + for img_id in image_results: + final_results.extend(image_results[img_id]) + + with open(output_file, 'w') as f: + json.dump(final_results, f) + + print(f"预测数据处理完成,经过 NMS 过滤后留存 {len(final_results)} 个预测框,存至: {output_file}") + return output_file + + +def evaluate_coco(gt_annotations, dt_results): + """使用COCO API评估单类火焰检测结果""" + coco_gt = COCO(gt_annotations) + if not dt_results or not os.path.exists(dt_results) or os.path.getsize(dt_results) <= 2: + print("未生成任何有效预测框,无法进行指标评估!") + return + + coco_dt = coco_gt.loadRes(dt_results) + coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + +def main(): + parser = argparse.ArgumentParser(description='解析火焰检测YOLO的bin预测结果,读取YOLO格式GT,一键完成COCO基准测试') + + parser.add_argument('--bin_dir', required=True, help='存放YOLO输出bin文件的预测结果目录') + parser.add_argument('--img_dir', required=True, help='存放原始测试图片的目录 (例如 fasdd/test/images)') + parser.add_argument('--labels_dir', required=True, help='存放YOLO格式 txt真值标注的目录 (例如 fasdd/test/labels)') + + parser.add_argument('--output_json', default='dt_result.json', help='预测结果临时COCO表示JSON的保存路径') + parser.add_argument('--gt_json_out', default='gt_annotations.json', help='真值结果转化为了COCO格式后的临时JSON存储路径') + + parser.add_argument('--nms_threshold', type=float, default=0.6, help='NMS的IOU阈值,默认0.6') + parser.add_argument('--conf_threshold', type=float, default=0.001, help='置信度过滤阈值,默认0.001') + parser.add_argument('--target_size', type=int, nargs=2, default=[384, 640], help='模型输入长边宽度和高度,例如"640 640"') + + args = parser.parse_args() + + for dir_path in [args.bin_dir, args.img_dir, args.labels_dir]: + if not os.path.isdir(dir_path): + print(f"严重错误: 检查到目录 {dir_path} 不存在,停止运行。") + exit(1) + + # 为适应非数字文件名的映射,对所有图片统一按字母排序生成固定的正整数 Image ID 索引 + img_files = glob.glob(os.path.join(args.img_dir, "*.*")) + img_exts = {'.jpg', '.png', '.jpeg'} + img_files = [f for f in img_files if os.path.splitext(f)[1].lower() in img_exts] + img_files.sort() + + name_to_id = {} + for idx, f in enumerate(img_files): + base_name = os.path.splitext(os.path.basename(f))[0] + name_to_id[base_name] = idx + 1 # COCO需要大于0的整数id + + print("------ 第一步:解析 Ground Truth (YOLO转COCO) ------") + gt_file = parse_yolo_labels_to_coco(args.img_dir, args.labels_dir, args.gt_json_out, name_to_id) + + print("\n------ 第二步:解析 Predictions (bin推盘反解与NMS) ------") + dt_file = parse_yolo_bin_files( + bin_dir=args.bin_dir, + img_dir=args.img_dir, + output_file=args.output_json, + name_to_id=name_to_id, + nms_threshold=args.nms_threshold, + conf_threshold=args.conf_threshold, + target_size=tuple(args.target_size) + ) + + print("\n================ 第三步:正式执行 COCO AP/AR 精度评估 ================\n") + evaluate_coco(gt_file, dt_file) + +if __name__ == "__main__": + main() diff --git a/samples/samples_GPL/built-in/fire_detection/script/drawRectangle.py b/samples/samples_GPL/built-in/fire_detection/script/drawRectangle.py new file mode 100755 index 0000000000000000000000000000000000000000..1d15f3f726d8f763470dd457cfb31880cd148bf4 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/script/drawRectangle.py @@ -0,0 +1,79 @@ +import cv2 +import re +import os +import numpy as np + +def get_color(class_id): + """根据类ID生成固定的随机颜色""" + # 使用随机种子确保同一个ID每次颜色都一样 + np.random.seed(class_id) + return tuple(np.random.randint(0, 255, size=3).tolist()) + +def draw_from_files(image_path, txt_path, conf_threshold=0.01): + # 加载图片 + img = cv2.imread(image_path) + if img is None: + print(f"错误: 无法加载图片 {image_path}") + return + + # 读取 txt 文件内容 + if not os.path.exists(txt_path): + print(f"错误: 找不到文件 {txt_path}") + return + + with open(txt_path, 'r', encoding='utf-8') as f: + result_text = f.read() + + # 2. 正则表达式解析文本 + pattern = r"Class (\d+) \| Score: ([\d.]+) \| Box: \[([\d.]+), ([\d.]+), ([\d.]+), ([\d.]+)\]" + matches = re.findall(pattern, result_text) + + count = 0 + for match in matches: + cls_id = int(match[0]) + score = float(match[1]) + + # 置信度过滤 + if score < conf_threshold: + continue + + # 坐标解析 + x1, y1, x2, y2 = map(int, [float(x) for x in match[2:]]) + + # 获取该类别的固定颜色 + color = get_color(cls_id) + + label_name = "fire" + display_txt = f"{label_name} {score:.2f}" + + # 3. 绘图 + # 画矩形框 + cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) + + # 绘制标签背景(颜色与框一致) + (w, h), baseline = cv2.getTextSize(display_txt, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(img, (x1, y1 - h - 10), (x1 + w, y1), color, -1) + + # 写文字(白色或黑色,取决于背景深浅,这里统一用白色) + cv2.putText(img, display_txt, (x1, y1 - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA) + count += 1 + + # 4. 展示与保存 + save_name = "res_" + os.path.basename(image_path) + cv2.imwrite(save_name, img) + print(f"检测完成!共绘制了 {count} 个目标") + print(f"结果已保存至: {save_name}") + + cv2.namedWindow("Result", cv2.WINDOW_NORMAL) + cv2.imshow("Result", img) + cv2.waitKey(0) + cv2.destroyAllWindows() + +if __name__ == "__main__": + print("--- 目标检测结果可视化工具 (多颜色版) ---") + img_in = input("请输入图片路径: ").strip().replace('"', '').replace("'", "") + txt_in = input("请输入结果txt路径: ").strip().replace('"', '').replace("'", "") + + # 建议阈值设为 0.01 来看你的 Class 0 结果 + draw_from_files(img_in, txt_in, conf_threshold=0.01) diff --git a/samples/samples_GPL/built-in/fire_detection/script/pth2onnx.py b/samples/samples_GPL/built-in/fire_detection/script/pth2onnx.py new file mode 100755 index 0000000000000000000000000000000000000000..f6b3739b1e9c66ecf98e8e712e4580293e6eee49 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/script/pth2onnx.py @@ -0,0 +1,17 @@ +# Copyright (c) ModelZoo. 2025-2025. All rights reserved. +# +# This file is part of ModelZoo, licensed under the Apache License, Version 2.0# (the "License"); you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from ultralytics import YOLO + +# 加载预训练模型 +model = YOLO("../model/fire_detectionV1.pt") +model.eval() +model.export(format="onnx", dynamic=False, imgsz=(384, 640), opset=13) diff --git a/samples/samples_GPL/built-in/fire_detection/src/CMakeLists.txt b/samples/samples_GPL/built-in/fire_detection/src/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..d1228ed4620b9944c93fe3c4b30e9ed1c493cfa2 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/src/CMakeLists.txt @@ -0,0 +1,36 @@ +# Copyright (c) ModelZoo. 2025-2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.5.1) + +project(ACL_INFER_SAMPLE) + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../out") + +set(COMMON_PATH "../../../common/") + +add_subdirectory(${COMMON_PATH} ./common_build) + +include_directories( + ${COMMON_PATH}/infer/platform + ${COMMON_PATH}/include +) + +link_directories( + ${LIB_PATH} +) + +add_executable(main main.cpp) +target_link_libraries(main infer_common) +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/samples/samples_GPL/built-in/fire_detection/src/acl.json b/samples/samples_GPL/built-in/fire_detection/src/acl.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/src/acl.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/samples/samples_GPL/built-in/fire_detection/src/main.cpp b/samples/samples_GPL/built-in/fire_detection/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..10083c19c54f30433e31cdc1b257deb0a8a8bbdb --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/src/main.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) ModelZoo. 2025-2025. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "model.h" +#include "log.h" +#include +#include +#include +#include +#include +#include +#include "utils.h" + +using namespace Infer; +using namespace std; + +int main(int argc, char *argv[]) +{ + InferParam inferParam; + if (!ParseParamFromCmd(argc, argv, inferParam)) { + LOG(ERROR) << "fail to parse cmd"; + return -1; + } + EnvInit(inferParam.aclConfigPath); + std::unique_ptr model = std::make_unique(); + if (model->Load(inferParam.omModelPath, FireDetection) != 0) { + LOG(ERROR) << "fail to load model"; + return 0; + } + auto ret = model->Infer(inferParam.imglistPath, JsonFile); + if (ret.size() == 0) { + LOG(ERROR) << "fail to infer model"; + model->Unload(); + EnvDeinit(); + return 0; + } + ret.clear(); + ret.shrink_to_fit(); + if (model->Unload() != 0) { + LOG(ERROR) << "fail to unload model"; + EnvDeinit(); + return 0; + } + EnvDeinit(); + return 0; +} diff --git a/samples/samples_GPL/built-in/fire_detection/train/fire_data.yaml b/samples/samples_GPL/built-in/fire_detection/train/fire_data.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34c9f401c4794a55878a23d8703199c3880dfad9 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/train/fire_data.yaml @@ -0,0 +1,31 @@ +# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license + +# COCO128 dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# Documentation: https://docs.ultralytics.com/datasets/detect/coco/ +# Example usage: yolo train data=coco128.yaml +# parent +# ├── ultralytics +# └── datasets +# └── coco128 ← downloads here (7 MB) + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ~/datasets/fire_detection # dataset root dir +train: + - fasdd/train/images + - DFS/train/images + - D-fire/train/images + - S2TLD/S2TLD_1080x1920/JPEGImages/images + - Lamp_detection/train/images + + +val: + - fasdd/val/images + - DFS/val/images + - D-fire/val/images + +test: + - fasdd/test/images + +# Classes +names: + 0: fire diff --git a/samples/samples_GPL/built-in/fire_detection/train/fire_model.yaml b/samples/samples_GPL/built-in/fire_detection/train/fire_model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d3b64185629b8a9c82dc51f00b6f2d4392ee636 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/train/fire_model.yaml @@ -0,0 +1,55 @@ +# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 1 # number of classes +depth_multiple: 0.5 +width_multiple: 0.25 +scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # summary: 181 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs + s: [0.50, 0.50, 1024] # summary: 181 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs + m: [0.50, 1.00, 512] # summary: 231 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs + l: [1.00, 1.00, 512] # summary: 357 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs + x: [1.00, 1.50, 512] # summary: 357 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs + +# YOLO11n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 2, C3k2, [192, False, 0.25]] + - [-1, 1, Conv, [192, 3, 2]] # 3-P3/8 + - [-1, 2, C3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2]] # 5-P4/16 + - [-1, 2, C3k2, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 + - [-1, 2, C3k2, [512, True]] + - [-1, 1, SPPF, [512, 5]] # 9 + - [-1, 2, C2PSA, [512]] # 10 + +# YOLO11n head +head: + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 2, C3k2, [256, False]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 2, C3k2, [125, False]] # 16 (P3/8-small) + - [-1, 1, ChannelAttention, [32]] + + - [-1, 1, Conv, [128, 3, 2]] + - [[-1, 13], 1, Concat, [1]] # cat head P4 + - [-1, 2, C3k2, [256, False]] # 20 (P4/16-medium) + - [-1, 1, ChannelAttention, [64]] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 2, C3k2, [512, True]] # 24 (P5/32-large) + - [-1, 1, ChannelAttention, [128]] + + - [[17, 21, 25], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/samples/samples_GPL/built-in/fire_detection/train/fire_train.yaml b/samples/samples_GPL/built-in/fire_detection/train/fire_train.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f682ccfa7d90657c672b8be833d603fad6d287d7 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/train/fire_train.yaml @@ -0,0 +1,133 @@ +# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license + +# Global configuration YAML with settings and hyperparameters for YOLO training, validation, prediction and export +# For documentation see https://docs.ultralytics.com/usage/cfg/ + +task: detect # (str) YOLO task, i.e. detect, segment, classify, pose, obb +mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark + +# Train settings ------------------------------------------------------------------------------------------------------- +model: fire_model.yaml # (str, optional) path to model file, i.e. yolov8n.pt or yolov8n.yaml +data: fire_data.yaml # (str, optional) path to data file, i.e. coco8.yaml +epochs: 300 # (int) number of epochs to train for +time: # (float, optional) max hours to train; overrides epochs if set +patience: 100 # (int) early stop after N epochs without val improvement +batch: 16 # (int | float) batch size as int (e.g. 16), or float 0.0–1.0 for AutoBatch fraction of GPU memory +imgsz: [384, 640] # (int | list) train/val use int (square); predict/export may use [h,w] +save: True # (bool) save train checkpoints and predict results +save_period: 10 # (int) save checkpoint every N epochs; disabled if < 1 +cache: True # (bool | str) cache images in RAM (True/'ram') or on 'disk' to speed dataloading; False disables +device: # (int | str | list) device: 0 or [0,1,2,3] for CUDA, 'cpu'/'mps', or -1/[-1,-1] to auto-select idle GPUs +workers: 8 # (int) dataloader workers (per RANK if DDP) +project: # (str, optional) project name for results root +name: fire_detection_train # (str, optional) experiment name; results in 'project/name' +exist_ok: False # (bool) overwrite existing 'project/name' if True +pretrained: True # (bool | str) use pretrained weights (bool) or load weights from path (str) +optimizer: SGD # (str) optimizer: SGD, MuSGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, or auto +verbose: True # (bool) print verbose logs during training/val +seed: 0 # (int) random seed for reproducibility +deterministic: True # (bool) enable deterministic ops; reproducible but may be slower +single_cls: False # (bool) treat all classes as a single class +rect: False # (bool) rectangular batches for train; rectangular batching for val when mode='val' +cos_lr: True # (bool) cosine learning rate scheduler +close_mosaic: 10 # (int) disable mosaic augmentation for final N epochs (0 to keep enabled) +resume: False # (bool) resume training from last checkpoint in the run dir +amp: True # (bool) Automatic Mixed Precision (AMP) training; True runs AMP capability check +fraction: 1.0 # (float) fraction of training dataset to use (1.0 = all) +profile: False # (bool) profile ONNX/TensorRT speeds during training for loggers +freeze: # (int | list, optional) freeze first N layers (int) or specific layer indices (list) +multi_scale: 0.0 # (float) multi-scale range as a fraction of imgsz; sizes are rounded to stride multiples +compile: False # (bool | str) enable torch.compile() backend='inductor'; True="default", False=off, or "default|reduce-overhead|max-autotune-no-cudagraphs" + +# Segmentation +overlap_mask: True # (bool) merge instance masks into one mask during training (segment only) +mask_ratio: 4 # (int) mask downsample ratio (segment only) + +# Classification +dropout: 0.0 # (float) dropout for classification head (classify only) + +# Val/Test settings ---------------------------------------------------------------------------------------------------- +val: True # (bool) run validation/testing during training +split: val # (str) dataset split to evaluate: 'val', 'test' or 'train' +save_json: False # (bool) save results to COCO JSON for external evaluation +conf: # (float, optional) confidence threshold; defaults: predict=0.25, val=0.001 +iou: 0.7 # (float) IoU threshold used for NMS +max_det: 300 # (int) maximum number of detections per image +half: False # (bool) use half precision (FP16) if supported +dnn: False # (bool) use OpenCV DNN for ONNX inference +plots: True # (bool) save plots and images during train/val +end2end: # (bool, optional) whether to use end2end head (YOLO26, YOLOv10) for predict/val/export + +# Predict settings ----------------------------------------------------------------------------------------------------- +source: # (str, optional) path/dir/URL/stream for images or videos; e.g. 'ultralytics/assets' or '0' for webcam +vid_stride: 1 # (int) read every Nth frame for video sources +stream_buffer: False # (bool) True buffers all frames; False keeps the most recent frame for low-latency streams +visualize: False # (bool) visualize model features (predict) or TP/FP/FN confusion (val) +augment: False # (bool) apply test-time augmentation during prediction +agnostic_nms: False # (bool) class-agnostic NMS +classes: # (int | list[int], optional) filter by class id(s), e.g. 0 or [0,2,3] +retina_masks: False # (bool) use high-resolution segmentation masks (segment) +embed: # (list[int], optional) return feature embeddings from given layer indices + +# Visualize settings --------------------------------------------------------------------------------------------------- +show: False # (bool) show images/videos in a window if supported +save_frames: False # (bool) save individual frames from video predictions +save_txt: False # (bool) save results as .txt files (xywh format) +save_conf: False # (bool) save confidence scores with results +save_crop: False # (bool) save cropped prediction regions to files +show_labels: True # (bool) draw class labels on images, e.g. 'person' +show_conf: True # (bool) draw confidence values on images, e.g. '0.99' +show_boxes: True # (bool) draw bounding boxes on images +line_width: # (int, optional) line width of boxes; auto-scales with image size if not set + +# Export settings ------------------------------------------------------------------------------------------------------ +format: torchscript # (str) target format, e.g. torchscript|onnx|openvino|engine|coreml|saved_model|pb|tflite|edgetpu|tfjs|paddle|mnn|ncnn|imx|rknn|executorch|axelera +keras: False # (bool) TF SavedModel only (format=saved_model); enable Keras layers during export +optimize: False # (bool) TorchScript only; apply mobile optimizations to the scripted model +int8: False # (bool) INT8/PTQ where supported (openvino, engine, coreml, tflite, tfjs, mnn, imx, axelera); needs calibration data/fraction +dynamic: False # (bool) dynamic shapes for torchscript, onnx, openvino, engine, coreml; enable variable image sizes +simplify: True # (bool) ONNX/engine only; run graph simplifier for cleaner ONNX before runtime conversion +opset: # (int, optional) ONNX/engine only; opset version for export; leave unset to use a tested default +workspace: # (float, optional) engine (TensorRT) only; workspace size in GiB, e.g. 4 +nms: False # (bool) fuse NMS into exported model when backend supports; if True, conf/iou apply (agnostic_nms except coreml) + +# Hyperparameters ------------------------------------------------------------------------------------------------------ +lr0: 0.01 # (float) initial learning rate (SGD=1e-2, Adam/AdamW=1e-3) +lrf: 0.01 # (float) final LR fraction; final LR = lr0 * lrf +momentum: 0.937 # (float) SGD momentum or Adam beta1 +weight_decay: 0.0005 # (float) weight decay (L2 regularization) +warmup_epochs: 3.0 # (float) warmup epochs (fractions allowed) +warmup_momentum: 0.8 # (float) initial momentum during warmup +warmup_bias_lr: 0.1 # (float) bias learning rate during warmup +box: 7.5 # (float) box loss gain +cls: 0.5 # (float) classification loss gain +dfl: 1.5 # (float) distribution focal loss gain +pose: 12.0 # (float) pose loss gain (pose tasks) +kobj: 1.0 # (float) keypoint objectness loss gain (pose tasks) +rle: 1.0 # (float) rle loss gain (pose tasks) +angle: 1.0 # (float) oriented angle loss gain (obb tasks) +nbs: 64 # (int) nominal batch size used for loss normalization +hsv_h: 0.015 # (float) HSV hue augmentation fraction +hsv_s: 0.7 # (float) HSV saturation augmentation fraction +hsv_v: 0.4 # (float) HSV value (brightness) augmentation fraction +degrees: 0.0 # (float) rotation degrees (+/-) +translate: 0.1 # (float) translation fraction (+/-) +scale: 0.5 # (float) scale gain (+/-) +shear: 0.0 # (float) shear degrees (+/-) +perspective: 0.0 # (float) perspective fraction (0–0.001 typical) +flipud: 0.0 # (float) vertical flip probability +fliplr: 0.5 # (float) horizontal flip probability +bgr: 0.0 # (float) RGB↔BGR channel swap probability +mosaic: 1.0 # (float) mosaic augmentation probability +mixup: 0.0 # (float) MixUp augmentation probability +cutmix: 0.0 # (float) CutMix augmentation probability +copy_paste: 0.0 # (float) segmentation copy-paste probability +copy_paste_mode: flip # (str) copy-paste strategy for segmentation: flip or mixup +auto_augment: randaugment # (str) classification auto augmentation policy: randaugment, autoaugment, augmix +erasing: 0.4 # (float) random erasing probability for classification (0.0–1.0) + +# Custom config.yaml --------------------------------------------------------------------------------------------------- +cfg: # (str, optional) path to a config.yaml that overrides defaults + +# Tracker settings ------------------------------------------------------------------------------------------------------ +tracker: botsort.yaml # (str) tracker config file: botsort.yaml or bytetrack.yaml diff --git a/samples/samples_GPL/built-in/fire_detection/train/prune.ipynb b/samples/samples_GPL/built-in/fire_detection/train/prune.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..53a3ebcc7b3c64af8a3c757f9e387c4cf0772f14 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/train/prune.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ea19f27b", + "metadata": {}, + "source": [ + "# 火焰检测模型剪枝\n", + "使用已经训练完成的网络,对网络进行结构化剪枝,使用yolo11n作为示例" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d26fb45", + "metadata": {}, + "outputs": [], + "source": [ + "from ultralytics import YOLO\n", + "import torch\n", + "import copy\n", + "\n", + "model_path = \"../model/yolo11n.pt\"\n", + "yolo = YOLO(model_path)\n", + "yolo.info()\n", + "yolo_copy =copy.deepcopy(yolo)\n", + "yolo_copy.val(data='fire_data.yaml', split=\"test\", device=[0], imgsz=(384, 640))" + ] + }, + { + "cell_type": "markdown", + "id": "7995d6ee", + "metadata": {}, + "source": [ + "当前模型在fasdd-test集上的mAP50为:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdce666c", + "metadata": {}, + "outputs": [], + "source": [ + "print(yolo.model) # 打印模型结构" + ] + }, + { + "cell_type": "markdown", + "id": "d8ee62a6", + "metadata": {}, + "source": [ + "## 可剪枝层分析\n", + "我们仅仅对卷积层进行剪枝,分析YOLO11结构中可剪枝部分: \n", + "1、跨模块:上层Conv连接下层Conv/C3k2.cv1.Conv \n", + "2、模块内:C3k2中Bottleneck层中卷积层连接:Bottleneck.cv1.Conv与Bottleneck.cv2.Conv \n", + "3、模块内:C3k2中多层Bottleneck层间的连接,如m.0.Bottleneck.cv2.Conv和m.1.Bottleneck.cv1 \n", + "4、模块内:C3k2中C3k中cv1.Conv和cv2.Conv与cv3.Conv中连接层 \n", + "5、模块内:C3k2层中存在split操作,cv1.Conv与cv2.Conv无法裁剪,可以将C3k2替换成C3k2_v2,后裁剪;C3k2_v2可裁剪cv0.Conv、cv1.Conv与cv2.Conv连接层 \n", + "6、跨模块:上层C3k2中的输出卷积层C3k2.cv2.Conv连接下层卷积层Conv \n", + "7、模块内:SPPF内部cv1输出与cv2连接部分 \n", + "8、模块内:Detect头class/Bbox,紧邻内部conv分支 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc29f0c2", + "metadata": {}, + "outputs": [], + "source": [ + "from ultralytics.nn.modules import Bottleneck, Conv, C2f, C3k2, SPPF, Detect\n", + "import torch\n", + "from torch.nn.modules.container import Sequential\n", + "import copy\n", + "import os\n", + "\n", + "class Pruner:\n", + " \"\"\"\n", + " 原生 YOLO 模型网络剪枝器。\n", + " 这套实现抛弃了第三方库,纯原生使用权重 slicing 来手动剪减张量。\n", + " 内建了如针对残差Bottleneck结构的保护等专业功能。\n", + " \"\"\"\n", + " def __init__(self):\n", + " pass\n", + "\n", + " def get_keep_indices(self, conv: Conv, keeping_rate: float, mode='L2'):\n", + " \"\"\"基于 L2 范数或 BN 的 Gamma 值做重要性判断并返回保留索引\"\"\"\n", + " if mode == 'gamma':\n", + " gamma = conv.bn.weight.data.detach()\n", + " channels = len(gamma)\n", + " keep_channels = max(1, int(channels * keeping_rate))\n", + " _, top_inds = torch.topk(gamma.abs(), k=keep_channels)\n", + " elif mode == 'L2':\n", + " weight = conv.conv.weight.data.detach()\n", + " norms_sq = (weight ** 2).sum(dim=[1, 2, 3])\n", + " channels = len(norms_sq)\n", + " keep_channels = max(1, int(channels * keeping_rate))\n", + " _, indices = torch.sort(norms_sq, descending=True)\n", + " top_inds = indices[:keep_channels]\n", + " else:\n", + " raise ValueError(\"mode 必须是 'L2' 或 'gamma'\")\n", + "\n", + " print(f\"[*] 通道评估 => 原: {channels} 期望保留: {keep_channels} (比率: {keeping_rate:.2f})\")\n", + " return top_inds.tolist(), keep_channels\n", + "\n", + " def prune_conv_pair(self, conv1: Conv, conv2: Conv, keeping_rate: float, mode='L2'):\n", + " \"\"\"\n", + " 单向标准剪枝:剪掉 conv1 输出和 conv2 输入对应的同等索引通道。\n", + " 适用于上下游没有受到 Add/Concat 的硬依赖时。\n", + " \"\"\"\n", + " keep_idxs, keep_channels = self.get_keep_indices(conv1, keeping_rate, mode)\n", + "\n", + " # 处理第一层的输出端相关权重(Batch Norm + 卷积)\n", + " conv1.bn.weight.data = conv1.bn.weight.data[keep_idxs]\n", + " conv1.bn.bias.data = conv1.bn.bias.data[keep_idxs]\n", + " conv1.bn.running_var.data = conv1.bn.running_var.data[keep_idxs]\n", + " conv1.bn.running_mean.data = conv1.bn.running_mean.data[keep_idxs]\n", + " conv1.bn.num_features = keep_channels\n", + "\n", + " conv1.conv.weight.data = conv1.conv.weight.data[keep_idxs]\n", + " if conv1.conv.bias is not None:\n", + " conv1.conv.bias.data = conv1.conv.bias.data[keep_idxs]\n", + " conv1.conv.out_channels = keep_channels\n", + "\n", + " # 处理第二层的输入端映射\n", + " conv2.conv.in_channels = keep_channels\n", + " conv2.conv.weight.data = conv2.conv.weight.data[:, keep_idxs]\n", + " if conv2.conv.bias is not None:\n", + " # bias of conv2 normally isn't related to in_channels, but keep as is natively\n", + " pass\n", + "\n", + "pruner = Pruner()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4849bc1", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "def plot_sensitivity_curve(sparsities, accuracies):\n", + " prune_ratio = [1-x for x in sparsities]\n", + "\n", + " # 裁剪前的准确率,即未剪枝时的准确率,替换为您实际跑出来的准确率\n", + " base_accuracy = 0.624\n", + "\n", + " # 创建图形\n", + " plt.figure(figsize=(10, 6))\n", + "\n", + " # 绘制剪枝率与准确率的关系折线图\n", + " plt.plot(prune_ratio, accuracies, 'o-', color='#1f77b4', linewidth=2, markersize=8, label='accuracy after prune')\n", + "\n", + " # 绘制裁剪前的准确率基准线(红色水平线)\n", + " plt.axhline(y=base_accuracy, color='r', linestyle='--', linewidth=2, label='base_accuracy')\n", + "\n", + " # 添加数据标签\n", + " for i, acc in enumerate(accuracies):\n", + " plt.annotate(f'{acc:.2f}', (prune_ratio[i], accuracies[i]),\n", + " textcoords=\"offset points\", xytext=(0,10), ha='center')\n", + "\n", + " # 设置图标标题和标签\n", + " plt.title('Sensitivity Curves: Validation Accuracy vs. Pruning Sparsity', fontsize=14)\n", + " plt.xlabel('pruning ratio', fontsize=12)\n", + " plt.ylabel('mAP50', fontsize=12)\n", + " plt.grid(True, linestyle='--', alpha=0.7)\n", + "\n", + " # 设置x轴范围\n", + " plt.xlim(-0.05, max(prune_ratio) * 1.1)\n", + " plt.ylim(0, max(accuracies) * 1.2)\n", + "\n", + " # 添加图例\n", + " plt.legend(loc='lower left')\n", + "\n", + " # 显示图片\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "def plot_multi_sensitivity_curve(sparsities, accuracies, sub_curves_name):\n", + " base_accuracy = 0.624\n", + " prune_ratio = [1-x for x in sparsities]\n", + " sub_line_count = len(accuracies)\n", + "\n", + " # 创建图形\n", + " plt.figure(figsize=(12, 8))\n", + "\n", + " # 为每层绘制曲线 (使用不同的颜色和线型)\n", + " colors = plt.cm.tab10(np.linspace(0, 1, sub_line_count))\n", + " line_styles = ['-', '--', '-.', ':'] # 线型\n", + "\n", + " # 绘制裁剪前的准确率基准线(红色水平线)\n", + " plt.axhline(y=base_accuracy, color='r', linestyle='--', linewidth=2, label='base_accuracy')\n", + "\n", + " # 添加数据标签\n", + " for i in range(sub_line_count):\n", + " for j, acc in enumerate(accuracies[i]):\n", + " plt.annotate(f'{acc:.2f}',\n", + " (prune_ratio[i], accuracies[i][j]),\n", + " textcoords='offset points',\n", + " xytext=(0, 10),\n", + " ha='center',\n", + " fontsize=9)\n", + "\n", + " # 设置图标标题和标签\n", + " plt.title('Sensitivity Curves: Validation Accuracy vs. Pruning Sparsity', fontsize=16, pad=20)\n", + " plt.xlabel('pruning ratio', fontsize=14, labelpad=10)\n", + " plt.ylabel('mAP50', fontsize=14, labelpad=10)\n", + " plt.grid(True, linestyle='--', alpha=0.7)\n", + "\n", + " # 设置x轴范围\n", + " plt.xlim(-0.05, max(prune_ratio) * 1.1)\n", + " plt.ylim(0, max([max(acc) for acc in accuracies]) * 1.2)\n", + "\n", + " # 添加图例\n", + " plt.legend(loc='upper right', bbox_to_anchor=(1.0, 1.0), ncol=2, fontsize=10, frameon=True)\n", + "\n", + " # 显示图片\n", + " plt.tight_layout(rect=[0, 0, 0.85, 1]) #为图例留出空间\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "86a6f5f5", + "metadata": {}, + "source": [ + "## 不同裁剪方案对比(gamma vs L2)\n", + "\n", + "# 一、network_slimming\n", + "network_slimming技术:使用bn层Gamma因子对channel进行排序,裁掉重要性低的通道\n", + "裁掉第0层的卷积层,看看精度下降多少" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d32ab5c8", + "metadata": {}, + "outputs": [], + "source": [ + "gamma_accuracies = []\n", + "sparsities = [0.875, 0.75, 0.5, 0.25]\n", + "for i, sparsity in enumerate(sparsities):\n", + " yolo_copy = copy.deepcopy(yolo)\n", + " model_seq = yolo_copy.model.model\n", + " pruner.prune_conv_pair(model_seq[0], model_seq[1], sparsity, mode='gamma')\n", + " result = yolo_copy.val(data='fire_data.yaml', split='test', device=[0])\n", + " gamma_accuracies.append(result.box.map)" + ] + }, + { + "cell_type": "markdown", + "id": "9967e028", + "metadata": {}, + "source": [ + "## 二、L2范数裁剪\n", + "使用卷积核的L2范数确定裁剪通道,同样使用第0个通道作实验" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0d05929", + "metadata": {}, + "outputs": [], + "source": [ + "l2_accuracies = []\n", + "sparsities = [0.875, 0.75, 0.5, 0.25]\n", + "for i, sparsity in enumerate(sparsities):\n", + " yolo_copy = copy.deepcopy(yolo)\n", + " seq = yolo_copy.model.model\n", + " pruner.prune_conv_pair(seq[0], seq[1], sparsity, \"L2\")\n", + " result = yolo_copy.val(data='fire_data.yaml', split='test', device=[0])\n", + " l2_accuracies.append(result.box.map)" + ] + }, + { + "cell_type": "markdown", + "id": "1e1c2ef5", + "metadata": {}, + "source": [ + "## 三、方案对比与原因分析\n", + "对比两种不同裁剪方案:仅仅对第0层" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8025b98", + "metadata": {}, + "outputs": [], + "source": [ + "compare_accuracies = []\n", + "compare_accuracies.append(gamma_accuracies)\n", + "compare_accuracies.append(l2_accuracies)\n", + "plot_multi_sensitivity_curve(sparsities, compare_accuracies, [\"gamma prune\", \"l2 prune\"])" + ] + }, + { + "cell_type": "markdown", + "id": "3da9aba7", + "metadata": {}, + "source": [ + "对于我们网络的第零层,对于gamma因子和L2范数裁剪,L2范数裁剪明显优于gamma因子裁剪,我们可以分析一下网络卷积层gamma因子分布与weight分布对比" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdd5e27e", + "metadata": {}, + "outputs": [], + "source": [ + "yolo_copy = copy.deepcopy(yolo)\n", + "\n", + "conv_weight = []\n", + "bn_weight = []\n", + "\n", + "# 遍历模型参数\n", + "for name, param in yolo_copy.model.model.named_parameters():\n", + " # 提取卷积层权重\n", + " if 'conv' in name and 'weight' in name:\n", + " weights = param.detach().view(-1).cpu()\n", + " conv_weight.append(weights)\n", + "\n", + " # 提取BN层gamma(BatchNorm2d weight)\n", + " elif 'bn' in name and 'weight' in name:\n", + " gamma = param.detach().view(-1).cpu()\n", + " bn_gammas.append(gamma)\n", + "\n", + "# 合并所有权重\n", + "conv_weights = torch.cat(conv_weights).numpy()\n", + "bn_gammas = torch.cat(bn_gammas).numpy()\n", + "\n", + "# 创建两个子图\n", + "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n", + "\n", + "# 卷积层权重分布\n", + "axes[0].hist(conv_weights, bins=256, density=True,\n", + " color='blue', alpha=0.7, edgecolor='black')\n", + "axes[0].set_title('Convolutional Later Weights Distribution', fontsize=14)\n", + "axes[0].set_xlabel(\"Weight Value\", fontsize=12)\n", + "axes[0].set_ylabel(\"Density\", fontsize=12)\n", + "axes[0].grid(True, linestyle='--', alpha=0.7)\n", + "\n", + "# BN层gamma分布\n", + "axes[0].hist(bn_gammas, bins=256, density=True,\n", + " color='blue', alpha=0.7, edgecolor='black')\n", + "axes[0].set_title('Convolutional Later Weights Distribution', fontsize=14)\n", + "axes[0].set_xlabel(\"Weight Value\", fontsize=12)\n", + "axes[0].set_ylabel(\"Density\", fontsize=12)\n", + "axes[0].grid(True, linestyle='--', alpha=0.7)\n", + "\n", + "plt.tight_layout()\n", + "plt.suptitle('Weight and Bn gamma Distribution Analysis', fontsize=16, y=1.02)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "bb3c4c31", + "metadata": {}, + "source": [ + "观察网络中BN层gamma的分布和所有卷积层中weight的分布,可以看出,weight大部分是集中在0附近的,这个与我们的正则化训练有关。同时由于weight这种0附近正态分布特性,有很多冗余weight,在weight为0时,该weight对后续层的推理结果贡献很小,是我们裁剪的原理。gamma则没有这种集中在0附近的分布,所以在我们Gamma的L1裁剪不如weight的L2裁剪效果,当然我们可以遵循network slimming原论文方法进行L1稀疏化训练来改变Gamma分布使其在0附近有一个尖峰,来提升L1裁剪的效果" + ] + }, + { + "cell_type": "markdown", + "id": "1346e2c8", + "metadata": {}, + "source": [ + "## C3k2中bottelneck层裁剪\n", + "以C3k2中bottelneck层为例,其他层可以仿造该代码来裁剪" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f219942d", + "metadata": {}, + "outputs": [], + "source": [ + "c3k2_accuracies = []\n", + "sparsities = [0.875, 0.75, 0.5, 0.25]\n", + "for i in [2, 4, 6, 8]:\n", + " accuracy = []\n", + " for _, sparsity in enumerate(sparsities):\n", + " yolo_copy = copy.deepcopy(yolo)\n", + " seq = yolo_copy.model.model\n", + " for name, m in seq[i].named_modules():\n", + " if isinstance(m, Bottleneck):\n", + " pruner.prune_conv_pair(m.cv1, m.cv2, sparsity, \"L2\")\n", + " result = yolo_copy.val(data='fire_data.yaml', split='test', device=[0])\n", + " accuracy.append(result.box.map)\n", + " c3k2_accuracies.append(accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98bd7041", + "metadata": {}, + "outputs": [], + "source": [ + "plot_multi_sensitivity_curve(sparsities, c3k2_accuracies, [\"module2_bottleneck\", \"module4_bottleneck\", \"module6_bottleneck\", \"module8_bottleneck\"])" + ] + }, + { + "cell_type": "markdown", + "id": "11cbcc15", + "metadata": {}, + "source": [ + "对每个可裁剪层做类似敏感度分析,然后给出每层最佳剪枝比例(如:精度掉点不超过百分之5),使用pruner对每层进行剪枝,剪枝完成后进行多轮重训" + ] + }, + { + "cell_type": "markdown", + "id": "finetune_1", + "metadata": {}, + "source": [ + "## 四、多轮全局结构化剪枝与微调重训 (Fine-tuning)\n", + "前面基于各层的敏感度评估,可以得出哪些层能以何种比例丢弃。现在,我们将其应用于整个网络,然后重启训练循环,这是保证模型掉点后能够通过重新反向传播训练弥补损失的**必不可少**环节。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "code_f1", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. 定义多轮迭代剪枝方案并对全网批量执行\n", + "print('========== 启动全局底层自动裁剪 ==========')\n", + "yolo_pruned = copy.deepcopy(yolo)\n", + "seq = yolo_pruned.model.model\n", + "prune_mgr = Pruner()\n", + "\n", + "# 我们选定基于敏感度表现不错的阈值,可裁剪层每层选择一个阈值,下面的阈值选取仅供参考\n", + "\n", + "# backbone剪枝\n", + "# backbone conv与conv连接剪枝\n", + "prune_mgr.prune_conv_pair(seq[0].Conv, seq[1].Conv, keeping_rate=0.875)\n", + "# backbone conv与C3k2连接剪枝\n", + "for i in [1, 3, 5, 7]:\n", + " target_sparsity = [0.875, 0.75, 0.5, 0.5]\n", + " prune_mgr.prune_conv_pair(seq[i].Conv, seq[i+1].cv1, target_sparsity[i])\n", + "# backbone c3k2与conv连接剪枝\n", + "for i in [2, 8]:\n", + " target_sparsity = [0.875, 0.5]\n", + " prune_mgr.prune_conv_pair(seq[i].cv2, seq[i+1].Conv, target_sparsity[i])\n", + "# backbone c3k2 bottelneck层剪枝\n", + "for i in range(10):\n", + " for name, m in seq[i].named_modules\n", + " if isinstance(m, Bottleneck):\n", + " prune_mgr.prune_conv_pair(m.cv1, m.cv2, 0.5)\n", + "\n", + "# neck剪枝\n", + "# neck c3k2 bottleneck层剪枝\n", + "for i in [13, 16, 19, 22]:\n", + " for name, m in seq[i].named_modules\n", + " if isinstance(m, Bottleneck):\n", + " prune_mgr.prune_conv_pair(m.cv1, m.cv2, 0.5)\n", + "\n", + "# head剪枝\n", + "for i in range(3):\n", + " prune_mgr.prune_conv_pair(seq[23].cv2[i][0], seq[23].cv2[i][1], 0.875)\n", + "print('========== 底层网络压缩完成 ==========')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "code_f2", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. 即刻验证因剪枝产生的维度是否能够顺利运行前向推理 (验证阶段会发现掉点现象是正常的)\n", + "print('验证模型连接和初步精度...')\n", + "res = yolo_pruned.val(data='fire_data.yaml', split='test', device=[0])\n", + "print(f'剪裁后未经重训直接推断的 MAP50: {res.box.map50:.5f}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "code_f3", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. 运行模型的重新微调训练 (Finetuning) 来找回精度\n", + "# 提示:如果是极度压缩的多轮剪枝(Iterative Pruning),需要将 剪枝 + 训练 套在一个对于 target_sparsity 梯度逐步递减的 for 热启动大循环外圈中。这里提供单轮标准微调作为核心示范:\n", + "# 不使用yolo_pruned.train(...), 而是手动构建trainer并绕过模型重新构造\n", + "print('========== 开始重训微调阶段 ==========')\n", + "from ultralytics.models.yolo.detect.train import DetectionTrainer\n", + "args = dict(\n", + " model='../model/yolo11n.pt',\n", + " data='fire_data.yaml',\n", + " epochs=25,\n", + " lr0=0.001,\n", + " batch=16,\n", + " device=[0],\n", + " name='fire_pruned_finetune',\n", + ")\n", + "trainer = DetectionTrainer(overrides=args)\n", + "# 将内存中已经剪枝好的模型 DetectionModel(其嵌套在YOLO().model.model中) 传递过去\n", + "trainer.model = yolo_pruned.model\n", + "trainer.train()\n", + "\n", + "# 训练完成后保存\n", + "os.makedirs('../model/pruned_target', exist_ok=True)\n", + "yolo_pruned.save('../model/pruned_target/final_pruned.pt')\n", + "print('========== 完全交付:微调网络已成功保存 ==========')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fire", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/samples/samples_GPL/built-in/fire_detection/train/yolo.patch b/samples/samples_GPL/built-in/fire_detection/train/yolo.patch new file mode 100644 index 0000000000000000000000000000000000000000..b31d2d9bb0fc375a192914140f8c6f77af61f9d9 --- /dev/null +++ b/samples/samples_GPL/built-in/fire_detection/train/yolo.patch @@ -0,0 +1,166 @@ +diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py +index d0e1340e1..180a1b605 100644 +--- a/ultralytics/nn/modules/__init__.py ++++ b/ultralytics/nn/modules/__init__.py +@@ -43,6 +43,7 @@ from .block import ( + C2fPSA, + C3Ghost, + C3k2, ++ C3k2_v2, + C3x, + CBFuse, + CBLinear, +@@ -137,6 +138,7 @@ __all__ = ( + "C2fPSA", + "C3Ghost", + "C3k2", ++ "C3k2_v2", + "C3x", + "CBFuse", + "CBLinear", +diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py +index 20d1734e6..6433c6880 100644 +--- a/ultralytics/nn/modules/block.py ++++ b/ultralytics/nn/modules/block.py +@@ -37,6 +37,7 @@ __all__ = ( + "C2fPSA", + "C3Ghost", + "C3k2", ++ "C3k2_v2", + "C3x", + "CBFuse", + "CBLinear", +@@ -318,6 +319,33 @@ class C2f(nn.Module): + y.extend(m(y[-1]) for m in self.m) + return self.cv2(torch.cat(y, 1)) + ++class C2f_v2(nn.Module): ++ """Faster Implementation of CSP Bottleneck with 2 convolutions.""" ++ ++ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5): ++ """Initialize a CSP bottleneck with 2 convolutions. ++ ++ Args: ++ c1 (int): Input channels. ++ c2 (int): Output channels. ++ n (int): Number of Bottleneck blocks. ++ shortcut (bool): Whether to use shortcut connections. ++ g (int): Groups for convolutions. ++ e (float): Expansion ratio. ++ """ ++ super().__init__() ++ self.c = int(c2 * e) # hidden channels ++ self.cv0 = Conv(c1, self.c, 1, 1) ++ self.cv1 = Conv(c1, self.c, 1, 1) ++ self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) ++ self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) ++ ++ def forward(self, x: torch.Tensor) -> torch.Tensor: ++ """Forward pass through C2f layer.""" ++ y = [self.cv0(x), self.cv1(x)] ++ y.extend(m(y[-1]) for m in self.m) ++ return self.cv2(torch.cat(y, 1)) ++ + + class C3(nn.Module): + """CSP Bottleneck with 3 convolutions.""" +@@ -1105,6 +1133,45 @@ class C3k2(C2f): + for _ in range(n) + ) + ++class C3k2_v2(C2f_v2): ++ """Faster Implementation of CSP Bottleneck with 2 convolutions.""" ++ ++ def __init__( ++ self, ++ c1: int, ++ c2: int, ++ n: int = 1, ++ c3k: bool = False, ++ e: float = 0.5, ++ attn: bool = False, ++ g: int = 1, ++ shortcut: bool = True, ++ ): ++ """Initialize C3k2_v2 module. ++ ++ Args: ++ c1 (int): Input channels. ++ c2 (int): Output channels. ++ n (int): Number of blocks. ++ c3k (bool): Whether to use C3k blocks. ++ e (float): Expansion ratio. ++ attn (bool): Whether to use attention blocks. ++ g (int): Groups for convolutions. ++ shortcut (bool): Whether to use shortcut connections. ++ """ ++ super().__init__(c1, c2, n, shortcut, g, e) ++ self.m = nn.ModuleList( ++ nn.Sequential( ++ Bottleneck(self.c, self.c, shortcut, g), ++ PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)), ++ ) ++ if attn ++ else C3k(self.c, self.c, 2, shortcut, g) ++ if c3k ++ else Bottleneck(self.c, self.c, shortcut, g) ++ for _ in range(n) ++ ) ++ + + class C3k(C3): + """C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks.""" +diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py +index b9561a2fa..cf8b6c663 100644 +--- a/ultralytics/nn/tasks.py ++++ b/ultralytics/nn/tasks.py +@@ -36,6 +36,7 @@ from ultralytics.nn.modules import ( + C2fPSA, + C3Ghost, + C3k2, ++ C3k2_v2, + C3x, + CBFuse, + CBLinear, +@@ -72,6 +73,7 @@ from ultralytics.nn.modules import ( + YOLOESegment, + YOLOESegment26, + v10Detect, ++ ChannelAttention, + ) + from ultralytics.utils import DEFAULT_CFG_DICT, LOGGER, WINDOWS, YAML, colorstr, emojis + from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml +@@ -1591,6 +1593,7 @@ def parse_model(d, ch, verbose=True): + C2, + C2f, + C3k2, ++ C3k2_v2, + RepNCSPELAN4, + ELAN1, + ADown, +@@ -1617,6 +1620,7 @@ def parse_model(d, ch, verbose=True): + C2, + C2f, + C3k2, ++ C3k2_v2, + C2fAttn, + C3, + C3TR, +@@ -1645,7 +1649,7 @@ def parse_model(d, ch, verbose=True): + if m in base_modules: + c1, c2 = ch[f], args[0] + if c2 != nc: # if c2 != nc (e.g., Classify() output) +- c2 = make_divisible(min(c2, max_channels) * width, 8) ++ c2 = make_divisible(min(c2, max_channels) * width, 4) + if m is C2fAttn: # set 1) embed channels and 2) num heads + args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8) + args[2] = int(max(round(min(args[2], max_channels // 2 // 32)) * width, 1) if args[2] > 1 else args[2]) +@@ -1654,7 +1658,7 @@ def parse_model(d, ch, verbose=True): + if m in repeat_modules: + args.insert(2, n) # number of repeats + n = 1 +- if m is C3k2: # for M/L/X sizes ++ if (m is C3k2) or (m is C3k2_v2): # for M/L/X sizes + legacy = False + if scale in "mlx": + args[3] = True diff --git a/samples/samples_GPL/common/cmake/toolchain_aarch64_610_linux.cmake b/samples/samples_GPL/common/cmake/toolchain_aarch64_610_linux.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2a8a5f83c52dd06de6b9447750d8b1f8aefe7ba9 --- /dev/null +++ b/samples/samples_GPL/common/cmake/toolchain_aarch64_610_linux.cmake @@ -0,0 +1,36 @@ +# Copyright (c) ModelZoo. 2025-2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +# 安装cann包 +# export NPU_INCLUDE_PATH=/path/to/ascend-toolkit/latest/acllib/include/acl +# export NPU_LIB_PATH=/path/to/ascend-toolkit/latest/acllib/lib64/stub +# eg: export NPU_INCLUDE_PATH=$HOME/Ascend/ascend-toolkit/svp_latest/acllib/include/acl +# eg:export NPU_LIB_PATH=$HOME/Ascend/ascend-toolkit/svp_latest/acllib/lib32/stub + +# cmake ../src -DSOC_VERSION=SS928V100 -DCMAKE_TOOLCHAIN_FILE=../../../../common/cmake/toolchain_aarch64_linux.cmake + +# 编译器 +set(CMAKE_C_COMPILER arm-v01c02-linux-musleabi-gcc) +set(CMAKE_CXX_COMPILER arm-v01c02-linux-musleabi-gcc) + +# 路径适配 +set(INC_PATH $ENV{NPU_INCLUDE_PATH}) +set(LIB_PATH $ENV{NPU_LIB_PATH}) +set(OPENCV_LIB_PATH "../opensource/opencv/lib/aarch64_610_linux") + +set(CXX_STDLIB stdc++) +set(LINUX_PLATFORM TRUE) diff --git a/samples/samples_GPL/common/include/model.h b/samples/samples_GPL/common/include/model.h index 3ed44c58e7b1f35ab2b091628e09b33bb64455e8..644f0705c3e985cdee53573344b67a785c798bb9 100644 --- a/samples/samples_GPL/common/include/model.h +++ b/samples/samples_GPL/common/include/model.h @@ -53,7 +53,8 @@ enum ModelType { Yolov8sObb, Yolov8s, Yolov9s, - Yolov8sWorld + Yolov8sWorld, + FireDetection }; using ProcessFunc = std::function&, std::vector&, std::vector&)>; diff --git a/samples/samples_GPL/common/infer/model.cpp b/samples/samples_GPL/common/infer/model.cpp index 500aa584a41ab3b55971b69840b1eee3ba42df1a..98d95cb4bebfa936d93e97a5e104ce06afe316b0 100644 --- a/samples/samples_GPL/common/infer/model.cpp +++ b/samples/samples_GPL/common/infer/model.cpp @@ -47,6 +47,8 @@ #include "yolov9s_preprocess.h" #include "yolov8s_world_postprocess.h" #include "yolov8s_world_preprocess.h" +#include "fire_detection_preprocess.h" +#include "fire_detection_postprocess.h" namespace Infer { struct ExecuteParam { @@ -75,7 +77,8 @@ const std::unordered_map> Model:: { ModelType::Yolov8s, { std::bind(Yolov8sPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, true), GetYoloV8sBox } }, { ModelType::Yolov5, { std::bind(Yolov7Preprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, true), std::bind(GetYoloV7Box, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, true) } }, { ModelType::Yolov9s, { std::bind(Yolov9sPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, true), GetYoloV9sBox } }, - { ModelType::Yolov8sWorld, { std::bind(Yolov8sWorldPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, true), GetYoloV8sWorldBox } } + { ModelType::Yolov8sWorld, { std::bind(Yolov8sWorldPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, true), GetYoloV8sWorldBox } }, + { ModelType::FireDetection, { FireDetectionPreprocess, GetFireDetectionBox } } }; #else const std::unordered_map> Model::modelTypeToProcessMap_ = { @@ -92,7 +95,8 @@ const std::unordered_map> Model:: { ModelType::Yolov8s, { std::bind(Yolov8sPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, false), GetYoloV8sBox } }, { ModelType::Yolov5, { std::bind(Yolov7Preprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, false), std::bind(GetYoloV7Box, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, false) } }, { ModelType::Yolov9s, { std::bind(Yolov9sPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, false), GetYoloV9sBox } }, - { ModelType::Yolov8sWorld, { std::bind(Yolov8sWorldPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, false), GetYoloV8sWorldBox } } + { ModelType::Yolov8sWorld, { std::bind(Yolov8sWorldPreprocess, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, false), GetYoloV8sWorldBox } }, + { ModelType::FireDetection, { FireDetectionPreprocess, GetFireDetectionBox } } }; #endif @@ -296,4 +300,4 @@ std::vector Model::Infer(std::vector& tensors) } return outputs; } -} // namespace Infer \ No newline at end of file +} // namespace Infer diff --git a/samples/samples_GPL/common/infer/post_process/fire_detection_postprocess.cpp b/samples/samples_GPL/common/infer/post_process/fire_detection_postprocess.cpp new file mode 100644 index 0000000000000000000000000000000000000000..500afacf9403d137f09e99fa4d2d1e11d376b30e --- /dev/null +++ b/samples/samples_GPL/common/infer/post_process/fire_detection_postprocess.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) ModelZoo. 2026-2026. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fire_detection_postprocess.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" + +namespace Infer { + +static int originWidth = 640; +static int originHeight = 384; + +constexpr int YOLO_INPUT_HEIGHT = 384; +constexpr int YOLO_INPUT_WIDTH = 640; +constexpr int NUM_CLASSES = 80; +constexpr int BBOX_COORD_NUM = 4; // bbox包含坐标的个数(x, y, w, h) +constexpr int BBOX_SIZE = BBOX_COORD_NUM + NUM_CLASSES; // bbox:xywh 模型预测类型的score +constexpr int BBOX_NUM = (YOLO_INPUT_HEIGHT / 8) * (YOLO_INPUT_WIDTH / 8) + + (YOLO_INPUT_HEIGHT / 16) * (YOLO_INPUT_WIDTH / 16) + (YOLO_INPUT_HEIGHT / 32) * (YOLO_INPUT_WIDTH / 32); +constexpr float CONF_THRES = 0.001f; +constexpr float IOU_THRES = 0.7f; +constexpr int YOLO_INPUT_SIZE = 640; +constexpr int BYTE_BIT_NUM = 8; // 1 byte = 8 bit + +// YOLO 输出格式相关常量 +constexpr int BOX_CX_INDEX = 0; +constexpr int BOX_CY_INDEX = 1; +constexpr int BOX_W_INDEX = 2; +constexpr int BOX_H_INDEX = 3; +constexpr float HALF_DIVISOR = 2.0f; + +// 定义检测框结构体 +struct BBox { + float x1, y1, x2, y2; // 左上和右下坐标 + float score; // 置信度 + int classId; // 类别ID +}; + +static BBox ScaleBboxToOriginal(const BBox& box) +{ + BBox boxNew; + float scale = std::min((float)YOLO_INPUT_HEIGHT / originHeight, + (float)YOLO_INPUT_WIDTH / originWidth); + scale = std::min(scale, 1.0f); + float newW = originWidth * scale; + float newH = originHeight * scale; + float dx = (YOLO_INPUT_SIZE - newW) / 2; + float dy = (YOLO_INPUT_SIZE - newH) / 2; + + boxNew.x1 = std::max(0.0f, std::min((box.x1 - dx) / scale, (float)originWidth)); + boxNew.y1 = std::max(0.0f, std::min((box.y1 - dy) / scale, (float)originHeight)); + boxNew.x2 = std::max(0.0f, std::min((box.x2 - dx) / scale, (float)originWidth)); + boxNew.y2 = std::max(0.0f, std::min((box.y2 - dy) / scale, (float)originHeight)); + boxNew.score = box.score; + boxNew.classId = box.classId; + + return boxNew; +} + +static void SaveBboxResult(std::vector& outBufs, std::vector bboxs, + const std::string& filePath) +{ + mode_t oldUmask = umask(0); + // 文件名类似:~/img/00001.bin + // 获取保存文件路径和文件名 + size_t start = filePath.find_last_of("/"); + size_t end = filePath.find_last_of("."); + + std::string fileName = filePath.substr(start + 1, end - start - 1); + std::string resultPath = "../out/result"; + std::string binPath = resultPath + "/bin/"; + std::string txtPath = resultPath + "/txt/"; + for (auto& path : { resultPath, binPath, txtPath }) { + struct stat info; + if (stat(path.c_str(), &info) != 0) { + mkdir(path.c_str(), 0777); + } + } + umask(oldUmask); + // 保存bin文件 + std::string binFile = binPath + fileName + "_result.bin"; + std::ofstream file(binFile, std::ios::binary); + if (file.is_open()) { + file.write(static_cast(outBufs[0].GetRawPtr()), outBufs[0].size); + file.close(); + } else { + LOG(ERROR) << "open result bin failed, " << filePath.c_str(); + } + + // 保存bbox结果 + std::string txtFile = txtPath + fileName + "_result.txt"; + std::ofstream file1(txtFile, std::ios::out); + if (file1.is_open()) { + for (auto& box1 : bboxs) { + auto box = ScaleBboxToOriginal(box1); + file1 << "Class " << box.classId << " | Score: " << box.score + << " | Box: [" << box.x1 << ", " << box.y1 << ", " + << box.x2 << ", " << box.y2 << "]\n"; + } + file1.close(); + } else { + LOG(ERROR) << "open result bin failed, " << filePath.c_str(); + } +}; + +// 计算两个框的IoU(交并比) +static float CalculateIoU(const BBox& box1, const BBox& box2) +{ + // 计算交集区域 + float interX1 = std::max(box1.x1, box2.x1); + float interY1 = std::max(box1.y1, box2.y1); + float interX2 = std::min(box1.x2, box2.x2); + float interY2 = std::min(box1.y2, box2.y2); + + // 计算交集面积 + float interArea = std::max(0.0f, interX2 - interX1 + 1) * std::max(0.0f, interY2 - interY1 + 1); + + // 计算各自面积 + float area1 = (box1.x2 - box1.x1 + 1) * (box1.y2 - box1.y1 + 1); + float area2 = (box2.x2 - box2.x1 + 1) * (box2.y2 - box2.y1 + 1); + + // 计算并集面积 + float unionArea = area1 + area2 - interArea; + + return interArea / unionArea; +} + + +// NMS主函数 +static std::vector NMS(std::vector& boxes) +{ + std::vector result; + + // 1. 按置信度降序排序 + std::sort(boxes.begin(), boxes.end(), + [](const BBox& a, const BBox& b) { return a.score > b.score; }); + + // 2. 初始化是否保留的标记 + std::vector keep(boxes.size(), true); + + // 3. 遍历所有框 + for (size_t i = 0; i < boxes.size(); ++i) { + if (!keep[i]) + continue; // 已标记移除则跳过 + + // 加入结果集 + result.push_back(boxes[i]); + + // 与后续框计算IoU + for (size_t j = i + 1; j < boxes.size(); ++j) { + if (!keep[j]) + continue; + + // 4. 计算IoU并判断是否移除 + if (CalculateIoU(boxes[i], boxes[j]) > IOU_THRES) { + keep[j] = false; // 标记为移除 + } + } + } + + return result; +} + +// 按类别分组NMS +static std::vector MulticlassNms(std::vector& boxes) +{ + std::vector result; + + // 1. 按类别分组 + std::sort(boxes.begin(), boxes.end(), + [](const BBox& a, const BBox& b) { return a.classId < b.classId; }); + + // 2. 对每个类别单独执行NMS + int currentClass = -1; + std::vector classBoxes; + + for (const auto& box : boxes) { + if (box.classId != currentClass) { + // 处理上一个类别 + if (!classBoxes.empty()) { + auto nmsResult = NMS(classBoxes); + result.insert(result.end(), nmsResult.begin(), nmsResult.end()); + classBoxes.clear(); + } + currentClass = box.classId; + } + classBoxes.push_back(box); + } + + // 处理最后一个类别 + if (!classBoxes.empty()) { + auto nmsResult = NMS(classBoxes); + result.insert(result.end(), nmsResult.begin(), nmsResult.end()); + } + + return result; +} + +static void Transpose(float* data, int rows, int cols) +{ + static thread_local std::vector temp; + int totalElements = rows * cols; + if (temp.size() < totalElements) { + temp.resize(totalElements); + } + + // 执行转置操作 + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + temp[j * rows + i] = data[i * cols + j]; + } + } + memcpy(data, temp.data(), totalElements * sizeof(float)); +} + +static std::vector GetBBox(std::vector& outBufs) +{ + std::vector bboxs; + float* data = static_cast(outBufs[0].GetRawPtr()); + + for (int i = 0; i < BBOX_NUM; i++) { + // 提取类别分数 + const float* scores = data + BBOX_COORD_NUM; /* 获取预测类别的概率入口点 */ + int classId = std::max_element(scores, scores + NUM_CLASSES) - scores; + float conf = scores[classId]; + + // 获取x y x y + float x1 = data[BOX_CX_INDEX] - data[BOX_W_INDEX] / HALF_DIVISOR; + float y1 = data[BOX_CY_INDEX] - data[BOX_H_INDEX] / HALF_DIVISOR; + float x2 = data[BOX_CX_INDEX] + data[BOX_W_INDEX] / HALF_DIVISOR; + float y2 = data[BOX_CY_INDEX] + data[BOX_H_INDEX] / HALF_DIVISOR; + + // 置信度过滤 + if (conf >= CONF_THRES) { + bboxs.push_back({ x1, y1, x2, y2, conf, classId }); + } + data += BBOX_SIZE; + } + return bboxs; +} + +static std::vector GetNmsBboxs(std::vector& outBufs) +{ + // yolo网输出的格式为xywh,代表边框中心点的坐标(x, y)和边框宽高(w, h) + std::vector bboxs = std::move(GetBBox(outBufs)); + // 执行NMS + std::vector result = MulticlassNms(bboxs); + return result; +} + +static Result PostProcess(std::vector& outBufs, const std::string& filePath) +{ + // 对矩阵进行转置,从84*8400变为8400*84,方便逐行处理 + float* data = static_cast(outBufs[0].GetRawPtr()); + Transpose(data, BBOX_SIZE, BBOX_NUM); + + // 得到经过nms后的bbox框,选取conf_thres=0.25 IOU_THRES=0.45 + std::vector bboxs = std::move(GetNmsBboxs(outBufs)); + + // 保存结果文件,保存output到bin,保存框结果到txt中 + SaveBboxResult(outBufs, bboxs, filePath); + return SUCCESS; +} + +bool GetFireDetectionBox(std::vector& fileList, + std::vector& tensorBufs, + std::vector& tensorDescs) +{ + cv::Mat img = cv::imread(fileList[0]); + cv::Size imgSize = img.size(); + originWidth = imgSize.width; + originHeight = imgSize.height; + (void)PostProcess(tensorBufs, fileList[0]); + LOG(INFO) << "Finished data postprocess successfully, " << fileList[0]; + return true; +} +} // namespace Infer diff --git a/samples/samples_GPL/common/infer/post_process/fire_detection_postprocess.h b/samples/samples_GPL/common/infer/post_process/fire_detection_postprocess.h new file mode 100644 index 0000000000000000000000000000000000000000..bb262ae4f267a6b92ebb5f14391cd0ce70548675 --- /dev/null +++ b/samples/samples_GPL/common/infer/post_process/fire_detection_postprocess.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) ModelZoo. 2026-2026. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "model.h" + +namespace Infer { +bool GetFireDetectionBox(std::vector& fileList, std::vector& tensorBufs, + std::vector& tensorDescs); +} diff --git a/samples/samples_GPL/common/infer/preprocess/fire_detection_preprocess.cpp b/samples/samples_GPL/common/infer/preprocess/fire_detection_preprocess.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a6ce7bbfead1aa3bad9a24f36ef2eddf3febe083 --- /dev/null +++ b/samples/samples_GPL/common/infer/preprocess/fire_detection_preprocess.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) ModelZoo. 2026-2026. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + */ + +#include "fire_detection_preprocess.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" + +using json = nlohmann::json; +static constexpr int BYTE_BIT_NUM = 8; +static constexpr int YOLO_INPUT_SIZE = 640; +namespace Infer { + +static Result ReadImgToBufDpico(const cv::Mat& mat, const TensorDesc& desc, + TensorBuf& inBuf) +{ + size_t matTotalBytes = mat.total() * mat.elemSize(); + size_t bufTotalBytes = desc.dims[desc.dimCount - 1] * desc.typeSize / BYTE_BIT_NUM; + for (size_t i = 0; i < desc.dimCount - 1; i++) { + bufTotalBytes *= desc.dims[i]; + } + + char* bufPtr = static_cast(inBuf.GetRawPtr()); + memcpy(bufPtr, mat.data, matTotalBytes); + return SUCCESS; +} + +// Letterbox函数实现 +static cv::Mat LetterBox(const cv::Mat& img, const cv::Size& targetSize, + bool scaleup) +{ + cv::Size shape = img.size(); + + // 计算缩放比例 + double r = std::min(static_cast(targetSize.width) / shape.width, + static_cast(targetSize.height) / shape.height); + r = std::min(r, 1.0); + + // 计算未填充的尺寸 + cv::Size unpad(static_cast(std::round(shape.width * r)), + static_cast(std::round(shape.height * r))); + + // 计算填充量 + double w = (targetSize.width - unpad.width) / 2.0f; + double h = (targetSize.height - unpad.height) / 2.0f; + + cv::Mat resized; + if (shape != unpad) { + cv::resize(img, resized, unpad, 0, 0, cv::INTER_LINEAR); + } else { + resized = img.clone(); + } + // 计算填充边界 + int top = static_cast(std::round(h - 0.1)); + int bottom = static_cast(std::round(h + 0.1)); + int left = static_cast(std::round(w - 0.1)); + int right = static_cast(std::round(w + 0.1)); + + cv::Mat padded; + cv::copyMakeBorder(resized, padded, top, bottom, left, right, + cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114)); + + return padded; +} + +static cv::Mat BGRToNV21(const cv::Mat& src) +{ + int w = src.cols; + int h = src.rows; + + cv::Mat nv21(h + h / 2, w, CV_8UC1); /* 2: nv21格式yuv比例为 4:1:1,yuv数据长度为原图的1.5倍 */ + + // 将 BGR 转换为 I420 (Planar) 格式 + // I420 布局: [Y (w*h)] [U (w*h/4)] [V (w*h/4)] + cv::Mat yuvI420; + cv::cvtColor(src, yuvI420, cv::COLOR_BGR2YUV_I420); + + // 拷贝 Y 分量 (直接拷贝前 h 行) + // nv21(cv::Rect(0, 0, w, h)) 对应 Y 平面 + yuvI420.rowRange(0, h).copyTo(nv21.rowRange(0, h)); + + // 交叉合并 U 和 V + const uint8_t* uPlane = yuvI420.ptr(h); // U 在 h 行开始 + const uint8_t* vPlane = yuvI420.ptr(h + h / 4); /* 4: yuvI420,格式uv数据长度为原图的1/4 */ + + // 指向 NV21 的 UV 交叉区起始位置 + uint8_t* uvData = nv21.ptr(h); + + int uvPixelCount = (w * h) / 4; /* 4: yuvI420,格式uv数据长度为原图的1/4 */ + for (int i = 0; i < uvPixelCount; ++i) { + // NV21 顺序是 V, U, V, U... + *uvData++ = vPlane[i]; + *uvData++ = uPlane[i]; + } + + return nv21; +} + +bool FireDetectionPreprocess(std::vector& fileList, std::vector& tensorBufs, + std::vector& tensorDescs) +{ + LOG(INFO) << "Processing file num: " << fileList.size(); + // 处理每个图像 + std::vector imgSize = { YOLO_INPUT_SIZE, YOLO_INPUT_SIZE }; + for (size_t i = 0; i < fileList.size(); ++i) { + std::string imgPath = fileList[i]; + LOG(INFO) << "imgPath: " << imgPath; + cv::Mat im0 = cv::imread(imgPath); + + // 应用letterbox + cv::Mat processed = LetterBox(im0, cv::Size(imgSize[0], imgSize[1]), false); + + // BGR到nv21:反转通道顺序 + cv::Mat yuvImg = BGRToNV21(processed); + ReadImgToBufDpico(yuvImg, tensorDescs[i], tensorBufs[i]); + + // 保存为nv21 + std::string rawPath = "frame_" + std::to_string(i) + ".nv21"; + std::ofstream ofs(rawPath, std::ios::binary); + if (ofs.is_open()) { + // nv21 内存是连续的,直接写入所有数据 + ofs.write(reinterpret_cast(yuvImg.data), yuvImg.total() * yuvImg.elemSize()); + ofs.close(); + LOG(INFO) << "Saved raw NV21 data to: " << rawPath; + } + } + + LOG(INFO) << "PreProcessing completed successfully!"; + return true; +} +} // namespace Infer diff --git a/samples/samples_GPL/common/infer/preprocess/fire_detection_preprocess.h b/samples/samples_GPL/common/infer/preprocess/fire_detection_preprocess.h new file mode 100644 index 0000000000000000000000000000000000000000..7301db3c80ab324abee67c2b29f8534810690634 --- /dev/null +++ b/samples/samples_GPL/common/infer/preprocess/fire_detection_preprocess.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) ModelZoo. 2026-2026. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "model.h" + +namespace Infer { +bool FireDetectionPreprocess(std::vector& fileList, std::vector& tensorBufs, + std::vector& tensorDescs); +} diff --git a/samples/samples_GPL/opensource/opencv/lib/aarch64_610_linux/libopencv_world.so.412 b/samples/samples_GPL/opensource/opencv/lib/aarch64_610_linux/libopencv_world.so.412 new file mode 100644 index 0000000000000000000000000000000000000000..15ba7c0e53ea9204742b30424ec3309c6a5c6c52 Binary files /dev/null and b/samples/samples_GPL/opensource/opencv/lib/aarch64_610_linux/libopencv_world.so.412 differ